{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999864308652585, "eval_steps": 500, "global_step": 16581, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 2.409638554216868e-08, "loss": 0.9933, "step": 1 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 4.819277108433736e-08, "loss": 0.9998, "step": 2 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 7.228915662650603e-08, "loss": 1.0277, "step": 3 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 9.638554216867472e-08, "loss": 0.979, "step": 4 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 1.204819277108434e-07, "loss": 1.1137, "step": 5 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 1.4457831325301206e-07, "loss": 1.0413, "step": 6 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 1.6867469879518075e-07, "loss": 1.1049, "step": 7 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 1.9277108433734944e-07, "loss": 1.0347, "step": 8 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 2.1686746987951808e-07, "loss": 0.9665, "step": 9 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 2.409638554216868e-07, "loss": 0.9178, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 2.6506024096385546e-07, "loss": 1.0499, "step": 11 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 2.891566265060241e-07, "loss": 0.9688, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 3.1325301204819284e-07, "loss": 0.9859, "step": 13 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 3.373493975903615e-07, "loss": 1.131, "step": 14 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 3.614457831325301e-07, "loss": 1.1655, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 3.855421686746989e-07, "loss": 1.0566, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 4.0963855421686754e-07, "loss": 1.0374, "step": 17 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 4.3373493975903615e-07, "loss": 0.9643, "step": 18 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 4.578313253012048e-07, "loss": 1.1707, "step": 19 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 4.819277108433736e-07, "loss": 1.109, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 5.060240963855422e-07, "loss": 1.0372, "step": 21 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 5.301204819277109e-07, "loss": 1.0031, "step": 22 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 5.542168674698796e-07, "loss": 1.0855, "step": 23 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 5.783132530120482e-07, "loss": 1.1235, "step": 24 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 6.024096385542169e-07, "loss": 0.9988, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 6.265060240963857e-07, "loss": 1.0514, "step": 26 }, { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 6.506024096385542e-07, "loss": 0.7772, "step": 27 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 6.74698795180723e-07, "loss": 0.9607, "step": 28 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 6.987951807228917e-07, "loss": 1.0153, "step": 29 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 7.228915662650602e-07, "loss": 1.0477, "step": 30 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 7.46987951807229e-07, "loss": 0.995, "step": 31 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 7.710843373493978e-07, "loss": 0.8837, "step": 32 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 7.951807228915663e-07, "loss": 0.9292, "step": 33 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 8.192771084337351e-07, "loss": 0.9168, "step": 34 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 8.433734939759036e-07, "loss": 0.9256, "step": 35 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 8.674698795180723e-07, "loss": 1.1059, "step": 36 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 8.915662650602411e-07, "loss": 0.916, "step": 37 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 9.156626506024096e-07, "loss": 0.7855, "step": 38 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 9.397590361445784e-07, "loss": 0.8543, "step": 39 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 9.638554216867472e-07, "loss": 0.969, "step": 40 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 9.879518072289156e-07, "loss": 0.9939, "step": 41 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.0120481927710845e-06, "loss": 1.0706, "step": 42 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.0361445783132532e-06, "loss": 1.0133, "step": 43 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.0602409638554218e-06, "loss": 0.9533, "step": 44 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.0843373493975905e-06, "loss": 0.949, "step": 45 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.1084337349397592e-06, "loss": 0.7944, "step": 46 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.1325301204819278e-06, "loss": 1.0251, "step": 47 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.1566265060240965e-06, "loss": 0.8835, "step": 48 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.1807228915662651e-06, "loss": 0.9313, "step": 49 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.2048192771084338e-06, "loss": 0.9486, "step": 50 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.2289156626506025e-06, "loss": 0.7591, "step": 51 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.2530120481927713e-06, "loss": 0.8413, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.2771084337349398e-06, "loss": 0.8469, "step": 53 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.3012048192771085e-06, "loss": 0.9393, "step": 54 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.3253012048192773e-06, "loss": 0.7916, "step": 55 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.349397590361446e-06, "loss": 0.86, "step": 56 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.3734939759036144e-06, "loss": 0.8668, "step": 57 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.3975903614457833e-06, "loss": 0.8813, "step": 58 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.421686746987952e-06, "loss": 0.8715, "step": 59 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.4457831325301204e-06, "loss": 0.7898, "step": 60 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.4698795180722893e-06, "loss": 1.0144, "step": 61 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.493975903614458e-06, "loss": 0.9403, "step": 62 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.5180722891566266e-06, "loss": 0.8833, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.5421686746987955e-06, "loss": 0.9605, "step": 64 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.566265060240964e-06, "loss": 0.8932, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.5903614457831326e-06, "loss": 0.955, "step": 66 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.6144578313253013e-06, "loss": 0.9001, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.6385542168674702e-06, "loss": 0.9571, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.6626506024096386e-06, "loss": 0.8803, "step": 69 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.6867469879518073e-06, "loss": 0.9133, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.7108433734939762e-06, "loss": 0.9822, "step": 71 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.7349397590361446e-06, "loss": 0.874, "step": 72 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.7590361445783133e-06, "loss": 0.8813, "step": 73 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.7831325301204822e-06, "loss": 0.8852, "step": 74 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.8072289156626508e-06, "loss": 0.8313, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.8313253012048193e-06, "loss": 0.7643, "step": 76 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.8554216867469881e-06, "loss": 0.7644, "step": 77 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.8795180722891568e-06, "loss": 0.8166, "step": 78 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.9036144578313255e-06, "loss": 0.9724, "step": 79 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.9277108433734943e-06, "loss": 0.82, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.951807228915663e-06, "loss": 0.9413, "step": 81 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 1.9759036144578312e-06, "loss": 0.7816, "step": 82 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7978, "step": 83 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.024096385542169e-06, "loss": 1.0487, "step": 84 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.0481927710843377e-06, "loss": 0.8318, "step": 85 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.0722891566265063e-06, "loss": 0.8275, "step": 86 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.096385542168675e-06, "loss": 0.8034, "step": 87 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.1204819277108437e-06, "loss": 0.8443, "step": 88 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.1445783132530123e-06, "loss": 0.9385, "step": 89 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.168674698795181e-06, "loss": 0.9342, "step": 90 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.1927710843373496e-06, "loss": 0.8834, "step": 91 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.2168674698795183e-06, "loss": 0.8875, "step": 92 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.240963855421687e-06, "loss": 0.8333, "step": 93 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.2650602409638556e-06, "loss": 0.8623, "step": 94 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.2891566265060243e-06, "loss": 0.8864, "step": 95 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.313253012048193e-06, "loss": 0.8077, "step": 96 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.3373493975903616e-06, "loss": 0.9135, "step": 97 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.3614457831325303e-06, "loss": 0.9192, "step": 98 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.385542168674699e-06, "loss": 0.9304, "step": 99 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.4096385542168676e-06, "loss": 0.9121, "step": 100 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.4337349397590363e-06, "loss": 0.96, "step": 101 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.457831325301205e-06, "loss": 0.8162, "step": 102 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.4819277108433736e-06, "loss": 0.9749, "step": 103 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.5060240963855427e-06, "loss": 0.8109, "step": 104 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.530120481927711e-06, "loss": 0.9423, "step": 105 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.5542168674698796e-06, "loss": 0.8685, "step": 106 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.5783132530120487e-06, "loss": 0.9138, "step": 107 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.602409638554217e-06, "loss": 0.9509, "step": 108 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.6265060240963856e-06, "loss": 0.8399, "step": 109 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.6506024096385547e-06, "loss": 0.8667, "step": 110 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.674698795180723e-06, "loss": 0.8323, "step": 111 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.698795180722892e-06, "loss": 1.0079, "step": 112 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.7228915662650607e-06, "loss": 0.8719, "step": 113 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.746987951807229e-06, "loss": 1.0293, "step": 114 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.771084337349398e-06, "loss": 0.916, "step": 115 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.7951807228915666e-06, "loss": 0.9451, "step": 116 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.819277108433735e-06, "loss": 0.699, "step": 117 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.843373493975904e-06, "loss": 1.0088, "step": 118 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.8674698795180726e-06, "loss": 0.8041, "step": 119 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.891566265060241e-06, "loss": 0.9034, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.91566265060241e-06, "loss": 0.8417, "step": 121 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.9397590361445786e-06, "loss": 0.9009, "step": 122 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.9638554216867473e-06, "loss": 0.8913, "step": 123 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 2.987951807228916e-06, "loss": 0.8216, "step": 124 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.012048192771085e-06, "loss": 0.924, "step": 125 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.0361445783132533e-06, "loss": 0.9288, "step": 126 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.060240963855422e-06, "loss": 0.9092, "step": 127 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.084337349397591e-06, "loss": 0.8756, "step": 128 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.1084337349397593e-06, "loss": 0.8366, "step": 129 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.132530120481928e-06, "loss": 0.9329, "step": 130 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.156626506024096e-06, "loss": 0.7597, "step": 131 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.1807228915662653e-06, "loss": 0.7688, "step": 132 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.204819277108434e-06, "loss": 1.1408, "step": 133 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.2289156626506026e-06, "loss": 0.7924, "step": 134 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.2530120481927713e-06, "loss": 0.7714, "step": 135 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.2771084337349403e-06, "loss": 0.8117, "step": 136 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.3012048192771086e-06, "loss": 0.9411, "step": 137 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 3.3253012048192772e-06, "loss": 0.8468, "step": 138 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.3493975903614463e-06, "loss": 1.0887, "step": 139 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.3734939759036146e-06, "loss": 0.8172, "step": 140 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.3975903614457832e-06, "loss": 0.8325, "step": 141 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.4216867469879523e-06, "loss": 0.9149, "step": 142 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.4457831325301206e-06, "loss": 0.8252, "step": 143 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.4698795180722892e-06, "loss": 0.8476, "step": 144 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.4939759036144583e-06, "loss": 0.919, "step": 145 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.5180722891566266e-06, "loss": 0.9694, "step": 146 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.5421686746987956e-06, "loss": 0.9461, "step": 147 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.5662650602409643e-06, "loss": 0.866, "step": 148 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.5903614457831325e-06, "loss": 1.0248, "step": 149 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.6144578313253016e-06, "loss": 0.8691, "step": 150 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.6385542168674703e-06, "loss": 0.855, "step": 151 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.6626506024096385e-06, "loss": 0.8335, "step": 152 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.6867469879518076e-06, "loss": 0.8492, "step": 153 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.7108433734939763e-06, "loss": 0.7327, "step": 154 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.7349397590361445e-06, "loss": 0.9997, "step": 155 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.7590361445783136e-06, "loss": 0.8588, "step": 156 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.7831325301204823e-06, "loss": 0.7978, "step": 157 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.807228915662651e-06, "loss": 1.0544, "step": 158 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.83132530120482e-06, "loss": 0.9213, "step": 159 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.855421686746989e-06, "loss": 0.7878, "step": 160 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.879518072289157e-06, "loss": 0.9163, "step": 161 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.903614457831326e-06, "loss": 0.8848, "step": 162 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.927710843373494e-06, "loss": 0.8307, "step": 163 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.9518072289156625e-06, "loss": 0.7358, "step": 164 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 3.975903614457832e-06, "loss": 1.0139, "step": 165 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.8365, "step": 166 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.024096385542169e-06, "loss": 0.8494, "step": 167 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.048192771084338e-06, "loss": 0.9035, "step": 168 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.072289156626506e-06, "loss": 0.9484, "step": 169 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.096385542168675e-06, "loss": 0.9692, "step": 170 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.1204819277108436e-06, "loss": 0.9031, "step": 171 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.144578313253013e-06, "loss": 0.7821, "step": 172 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.168674698795181e-06, "loss": 0.8158, "step": 173 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.19277108433735e-06, "loss": 0.8071, "step": 174 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.216867469879519e-06, "loss": 0.7466, "step": 175 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.240963855421687e-06, "loss": 0.8547, "step": 176 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.2650602409638555e-06, "loss": 0.7527, "step": 177 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.289156626506025e-06, "loss": 0.8, "step": 178 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.313253012048193e-06, "loss": 0.9808, "step": 179 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.337349397590362e-06, "loss": 0.8911, "step": 180 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.361445783132531e-06, "loss": 0.8672, "step": 181 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.385542168674699e-06, "loss": 0.8647, "step": 182 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.4096385542168675e-06, "loss": 0.9375, "step": 183 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.433734939759037e-06, "loss": 0.8317, "step": 184 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.457831325301205e-06, "loss": 0.9315, "step": 185 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.481927710843374e-06, "loss": 0.8332, "step": 186 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.506024096385542e-06, "loss": 0.7694, "step": 187 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.530120481927711e-06, "loss": 0.8893, "step": 188 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.55421686746988e-06, "loss": 0.9372, "step": 189 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.578313253012049e-06, "loss": 0.8354, "step": 190 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.602409638554217e-06, "loss": 0.8252, "step": 191 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.626506024096386e-06, "loss": 0.7327, "step": 192 }, { "epoch": 0.03, "grad_norm": 0.0, "learning_rate": 4.650602409638554e-06, "loss": 0.7954, "step": 193 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.674698795180723e-06, "loss": 0.6656, "step": 194 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.698795180722892e-06, "loss": 0.9385, "step": 195 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.7228915662650606e-06, "loss": 0.7503, "step": 196 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.74698795180723e-06, "loss": 0.9829, "step": 197 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.771084337349398e-06, "loss": 0.8484, "step": 198 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.795180722891566e-06, "loss": 0.7583, "step": 199 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.819277108433735e-06, "loss": 0.7687, "step": 200 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.843373493975904e-06, "loss": 0.7858, "step": 201 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.8674698795180725e-06, "loss": 0.8416, "step": 202 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.891566265060242e-06, "loss": 0.8782, "step": 203 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.91566265060241e-06, "loss": 0.818, "step": 204 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.939759036144578e-06, "loss": 0.8386, "step": 205 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.963855421686747e-06, "loss": 0.8056, "step": 206 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 4.987951807228916e-06, "loss": 0.892, "step": 207 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.012048192771085e-06, "loss": 0.8621, "step": 208 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.036144578313254e-06, "loss": 0.9359, "step": 209 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.060240963855422e-06, "loss": 0.8821, "step": 210 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.084337349397591e-06, "loss": 0.7707, "step": 211 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.108433734939759e-06, "loss": 0.9357, "step": 212 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.132530120481927e-06, "loss": 0.7533, "step": 213 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.156626506024097e-06, "loss": 0.6753, "step": 214 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.180722891566266e-06, "loss": 0.958, "step": 215 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.204819277108434e-06, "loss": 0.9483, "step": 216 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.228915662650603e-06, "loss": 0.8358, "step": 217 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.253012048192771e-06, "loss": 1.0156, "step": 218 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.27710843373494e-06, "loss": 0.6741, "step": 219 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.301204819277109e-06, "loss": 0.8274, "step": 220 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.325301204819278e-06, "loss": 1.0197, "step": 221 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.349397590361446e-06, "loss": 0.8142, "step": 222 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.373493975903615e-06, "loss": 0.9848, "step": 223 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.397590361445784e-06, "loss": 0.8983, "step": 224 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.421686746987952e-06, "loss": 0.749, "step": 225 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.445783132530121e-06, "loss": 0.7921, "step": 226 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.4698795180722896e-06, "loss": 0.8319, "step": 227 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.493975903614458e-06, "loss": 0.7893, "step": 228 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.518072289156628e-06, "loss": 0.7996, "step": 229 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.542168674698796e-06, "loss": 0.8774, "step": 230 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.566265060240964e-06, "loss": 0.7888, "step": 231 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.590361445783133e-06, "loss": 0.8894, "step": 232 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.6144578313253015e-06, "loss": 0.9809, "step": 233 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.63855421686747e-06, "loss": 0.8376, "step": 234 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.66265060240964e-06, "loss": 0.9573, "step": 235 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.686746987951808e-06, "loss": 0.9349, "step": 236 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.710843373493976e-06, "loss": 0.7907, "step": 237 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.734939759036145e-06, "loss": 0.7478, "step": 238 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.7590361445783135e-06, "loss": 0.8903, "step": 239 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.783132530120482e-06, "loss": 0.8676, "step": 240 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.807228915662652e-06, "loss": 1.0163, "step": 241 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.83132530120482e-06, "loss": 0.9657, "step": 242 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.855421686746988e-06, "loss": 0.8861, "step": 243 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.879518072289157e-06, "loss": 0.9508, "step": 244 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.9036144578313255e-06, "loss": 0.8812, "step": 245 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.927710843373495e-06, "loss": 0.8519, "step": 246 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.951807228915664e-06, "loss": 0.7461, "step": 247 }, { "epoch": 0.04, "grad_norm": 0.0, "learning_rate": 5.975903614457832e-06, "loss": 0.8162, "step": 248 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6e-06, "loss": 0.7933, "step": 249 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.02409638554217e-06, "loss": 1.0306, "step": 250 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.048192771084338e-06, "loss": 0.8492, "step": 251 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.0722891566265066e-06, "loss": 0.8744, "step": 252 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.096385542168676e-06, "loss": 0.8512, "step": 253 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.120481927710844e-06, "loss": 0.7367, "step": 254 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.144578313253012e-06, "loss": 1.0003, "step": 255 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.168674698795182e-06, "loss": 0.8187, "step": 256 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.19277108433735e-06, "loss": 0.9438, "step": 257 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.2168674698795185e-06, "loss": 0.8383, "step": 258 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.240963855421688e-06, "loss": 0.8883, "step": 259 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.265060240963856e-06, "loss": 0.9283, "step": 260 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.289156626506024e-06, "loss": 0.9108, "step": 261 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.313253012048192e-06, "loss": 0.9423, "step": 262 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.337349397590362e-06, "loss": 0.8678, "step": 263 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.3614457831325305e-06, "loss": 0.8349, "step": 264 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.385542168674699e-06, "loss": 0.8778, "step": 265 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.409638554216868e-06, "loss": 0.8455, "step": 266 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.433734939759036e-06, "loss": 0.8178, "step": 267 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.457831325301205e-06, "loss": 0.9463, "step": 268 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.481927710843374e-06, "loss": 0.9337, "step": 269 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.5060240963855425e-06, "loss": 1.0041, "step": 270 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.530120481927711e-06, "loss": 0.8572, "step": 271 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.554216867469881e-06, "loss": 0.8111, "step": 272 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.578313253012049e-06, "loss": 0.9356, "step": 273 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.602409638554217e-06, "loss": 0.9846, "step": 274 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.626506024096386e-06, "loss": 0.8376, "step": 275 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.6506024096385545e-06, "loss": 0.9458, "step": 276 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.674698795180723e-06, "loss": 0.8227, "step": 277 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.698795180722893e-06, "loss": 0.8492, "step": 278 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.722891566265061e-06, "loss": 0.6811, "step": 279 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.746987951807229e-06, "loss": 0.8488, "step": 280 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.771084337349398e-06, "loss": 0.7919, "step": 281 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.7951807228915665e-06, "loss": 0.8318, "step": 282 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.819277108433735e-06, "loss": 0.7967, "step": 283 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.843373493975905e-06, "loss": 0.8105, "step": 284 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.867469879518073e-06, "loss": 0.983, "step": 285 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.891566265060241e-06, "loss": 0.8217, "step": 286 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.91566265060241e-06, "loss": 0.7533, "step": 287 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.9397590361445784e-06, "loss": 0.9275, "step": 288 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.963855421686747e-06, "loss": 0.7844, "step": 289 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 6.987951807228917e-06, "loss": 0.9417, "step": 290 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.012048192771085e-06, "loss": 0.8278, "step": 291 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.036144578313253e-06, "loss": 0.9332, "step": 292 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.060240963855422e-06, "loss": 0.8902, "step": 293 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.084337349397591e-06, "loss": 0.8338, "step": 294 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.1084337349397595e-06, "loss": 0.8052, "step": 295 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.132530120481929e-06, "loss": 0.9495, "step": 296 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.156626506024097e-06, "loss": 0.9341, "step": 297 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.180722891566265e-06, "loss": 0.8941, "step": 298 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.204819277108435e-06, "loss": 0.9085, "step": 299 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.228915662650603e-06, "loss": 0.8251, "step": 300 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.2530120481927715e-06, "loss": 0.7215, "step": 301 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.277108433734941e-06, "loss": 0.8896, "step": 302 }, { "epoch": 0.05, "grad_norm": 0.0, "learning_rate": 7.301204819277109e-06, "loss": 0.9505, "step": 303 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.325301204819277e-06, "loss": 0.8226, "step": 304 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.349397590361447e-06, "loss": 0.8341, "step": 305 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.373493975903615e-06, "loss": 0.8791, "step": 306 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.3975903614457835e-06, "loss": 0.8399, "step": 307 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.4216867469879526e-06, "loss": 0.8395, "step": 308 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.445783132530121e-06, "loss": 0.9447, "step": 309 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.469879518072289e-06, "loss": 0.8915, "step": 310 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.493975903614459e-06, "loss": 0.7816, "step": 311 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.518072289156627e-06, "loss": 0.9138, "step": 312 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.5421686746987955e-06, "loss": 0.9177, "step": 313 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.5662650602409645e-06, "loss": 0.9939, "step": 314 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.590361445783133e-06, "loss": 0.9234, "step": 315 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.614457831325302e-06, "loss": 0.798, "step": 316 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.638554216867471e-06, "loss": 0.7946, "step": 317 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.66265060240964e-06, "loss": 0.9554, "step": 318 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.686746987951807e-06, "loss": 0.815, "step": 319 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.710843373493977e-06, "loss": 0.9267, "step": 320 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.734939759036146e-06, "loss": 0.8572, "step": 321 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.759036144578314e-06, "loss": 0.8853, "step": 322 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.783132530120484e-06, "loss": 0.9556, "step": 323 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.807228915662652e-06, "loss": 0.9225, "step": 324 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.83132530120482e-06, "loss": 0.8139, "step": 325 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.855421686746989e-06, "loss": 0.9148, "step": 326 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.879518072289157e-06, "loss": 0.7945, "step": 327 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.903614457831325e-06, "loss": 1.0339, "step": 328 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.927710843373495e-06, "loss": 0.8982, "step": 329 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.951807228915663e-06, "loss": 0.8012, "step": 330 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 7.975903614457831e-06, "loss": 0.8088, "step": 331 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 0.8111, "step": 332 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.02409638554217e-06, "loss": 0.9313, "step": 333 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.048192771084338e-06, "loss": 0.8214, "step": 334 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.072289156626508e-06, "loss": 0.7928, "step": 335 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.096385542168676e-06, "loss": 0.7952, "step": 336 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.120481927710844e-06, "loss": 0.776, "step": 337 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.144578313253012e-06, "loss": 0.8453, "step": 338 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.16867469879518e-06, "loss": 0.8433, "step": 339 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.19277108433735e-06, "loss": 0.7948, "step": 340 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.216867469879519e-06, "loss": 0.9881, "step": 341 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.240963855421687e-06, "loss": 0.8797, "step": 342 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.265060240963855e-06, "loss": 0.8045, "step": 343 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.289156626506025e-06, "loss": 0.8817, "step": 344 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.313253012048194e-06, "loss": 0.8319, "step": 345 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.337349397590362e-06, "loss": 0.7341, "step": 346 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.361445783132532e-06, "loss": 0.8384, "step": 347 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.3855421686747e-06, "loss": 0.8642, "step": 348 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.409638554216868e-06, "loss": 0.9419, "step": 349 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.433734939759038e-06, "loss": 0.7104, "step": 350 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.457831325301206e-06, "loss": 0.9237, "step": 351 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.481927710843375e-06, "loss": 0.7796, "step": 352 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.506024096385543e-06, "loss": 0.9206, "step": 353 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.530120481927711e-06, "loss": 0.864, "step": 354 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.55421686746988e-06, "loss": 0.8835, "step": 355 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.57831325301205e-06, "loss": 0.7904, "step": 356 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.602409638554217e-06, "loss": 1.0347, "step": 357 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.626506024096386e-06, "loss": 0.8234, "step": 358 }, { "epoch": 0.06, "grad_norm": 0.0, "learning_rate": 8.650602409638556e-06, "loss": 0.8887, "step": 359 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.674698795180724e-06, "loss": 0.8904, "step": 360 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.698795180722892e-06, "loss": 0.8077, "step": 361 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.722891566265062e-06, "loss": 0.6606, "step": 362 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.74698795180723e-06, "loss": 0.7288, "step": 363 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.771084337349399e-06, "loss": 0.7692, "step": 364 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.795180722891567e-06, "loss": 0.7706, "step": 365 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.819277108433735e-06, "loss": 0.8322, "step": 366 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.843373493975905e-06, "loss": 0.7608, "step": 367 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.867469879518073e-06, "loss": 1.0052, "step": 368 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.891566265060241e-06, "loss": 0.8318, "step": 369 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.91566265060241e-06, "loss": 0.8936, "step": 370 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.939759036144578e-06, "loss": 0.8371, "step": 371 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.963855421686748e-06, "loss": 0.8295, "step": 372 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 8.987951807228916e-06, "loss": 0.8446, "step": 373 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.012048192771084e-06, "loss": 0.7988, "step": 374 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.036144578313254e-06, "loss": 0.9395, "step": 375 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.060240963855423e-06, "loss": 0.9361, "step": 376 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.08433734939759e-06, "loss": 0.786, "step": 377 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.10843373493976e-06, "loss": 0.8329, "step": 378 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.132530120481929e-06, "loss": 0.8536, "step": 379 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.156626506024097e-06, "loss": 0.9151, "step": 380 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.180722891566265e-06, "loss": 0.8811, "step": 381 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.204819277108434e-06, "loss": 0.9562, "step": 382 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.228915662650602e-06, "loss": 0.7978, "step": 383 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.253012048192772e-06, "loss": 0.9651, "step": 384 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.27710843373494e-06, "loss": 0.8507, "step": 385 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.301204819277108e-06, "loss": 0.9082, "step": 386 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.325301204819278e-06, "loss": 0.9593, "step": 387 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.349397590361446e-06, "loss": 0.8886, "step": 388 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.373493975903615e-06, "loss": 0.8763, "step": 389 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.397590361445785e-06, "loss": 0.7967, "step": 390 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.421686746987953e-06, "loss": 0.819, "step": 391 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.445783132530121e-06, "loss": 0.8115, "step": 392 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.46987951807229e-06, "loss": 0.8141, "step": 393 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.49397590361446e-06, "loss": 0.924, "step": 394 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.518072289156628e-06, "loss": 0.8449, "step": 395 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.542168674698796e-06, "loss": 0.7596, "step": 396 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.566265060240964e-06, "loss": 0.7114, "step": 397 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.590361445783132e-06, "loss": 0.8765, "step": 398 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.614457831325302e-06, "loss": 0.9695, "step": 399 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.63855421686747e-06, "loss": 0.8049, "step": 400 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.662650602409639e-06, "loss": 0.8108, "step": 401 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.686746987951809e-06, "loss": 0.8972, "step": 402 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.710843373493977e-06, "loss": 0.8609, "step": 403 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.734939759036145e-06, "loss": 0.9394, "step": 404 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.759036144578315e-06, "loss": 0.8651, "step": 405 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.783132530120483e-06, "loss": 0.8566, "step": 406 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.807228915662652e-06, "loss": 0.8763, "step": 407 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.83132530120482e-06, "loss": 0.7841, "step": 408 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.855421686746988e-06, "loss": 0.8023, "step": 409 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.879518072289156e-06, "loss": 0.8319, "step": 410 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.903614457831326e-06, "loss": 0.9138, "step": 411 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.927710843373494e-06, "loss": 0.8246, "step": 412 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.951807228915663e-06, "loss": 0.8607, "step": 413 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 9.975903614457833e-06, "loss": 0.7961, "step": 414 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.7563, "step": 415 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.002409638554217e-05, "loss": 0.9983, "step": 416 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0048192771084337e-05, "loss": 0.8444, "step": 417 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0072289156626507e-05, "loss": 0.7738, "step": 418 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0096385542168675e-05, "loss": 0.8988, "step": 419 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0120481927710844e-05, "loss": 0.7212, "step": 420 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0144578313253014e-05, "loss": 0.7992, "step": 421 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0168674698795182e-05, "loss": 0.8277, "step": 422 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.019277108433735e-05, "loss": 0.8166, "step": 423 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0216867469879518e-05, "loss": 0.8849, "step": 424 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0240963855421688e-05, "loss": 0.8111, "step": 425 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0265060240963855e-05, "loss": 0.9576, "step": 426 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0289156626506025e-05, "loss": 0.8541, "step": 427 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0313253012048195e-05, "loss": 0.6968, "step": 428 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0337349397590361e-05, "loss": 0.82, "step": 429 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0361445783132531e-05, "loss": 0.837, "step": 430 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0385542168674701e-05, "loss": 0.7582, "step": 431 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0409638554216868e-05, "loss": 0.8618, "step": 432 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0433734939759038e-05, "loss": 0.8822, "step": 433 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0457831325301206e-05, "loss": 0.8791, "step": 434 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0481927710843374e-05, "loss": 0.825, "step": 435 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0506024096385542e-05, "loss": 0.8543, "step": 436 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0530120481927712e-05, "loss": 0.8001, "step": 437 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.055421686746988e-05, "loss": 0.9488, "step": 438 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0578313253012049e-05, "loss": 0.7605, "step": 439 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0602409638554219e-05, "loss": 0.786, "step": 440 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0626506024096385e-05, "loss": 0.7246, "step": 441 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0650602409638555e-05, "loss": 0.8365, "step": 442 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0674698795180725e-05, "loss": 1.0288, "step": 443 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0698795180722892e-05, "loss": 0.8278, "step": 444 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0722891566265062e-05, "loss": 0.8522, "step": 445 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.074698795180723e-05, "loss": 0.914, "step": 446 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0771084337349398e-05, "loss": 1.1814, "step": 447 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0795180722891568e-05, "loss": 0.9307, "step": 448 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0819277108433736e-05, "loss": 0.8121, "step": 449 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0843373493975904e-05, "loss": 0.8864, "step": 450 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0867469879518073e-05, "loss": 0.7653, "step": 451 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0891566265060243e-05, "loss": 0.936, "step": 452 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.091566265060241e-05, "loss": 0.9926, "step": 453 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0939759036144579e-05, "loss": 0.8976, "step": 454 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0963855421686749e-05, "loss": 0.7807, "step": 455 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.0987951807228916e-05, "loss": 0.9684, "step": 456 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1012048192771086e-05, "loss": 0.8849, "step": 457 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1036144578313255e-05, "loss": 0.8115, "step": 458 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1060240963855422e-05, "loss": 1.0029, "step": 459 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1084337349397592e-05, "loss": 0.8887, "step": 460 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.110843373493976e-05, "loss": 0.7656, "step": 461 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1132530120481928e-05, "loss": 0.7961, "step": 462 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1156626506024097e-05, "loss": 0.8135, "step": 463 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1180722891566267e-05, "loss": 0.8993, "step": 464 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1204819277108435e-05, "loss": 0.7327, "step": 465 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1228915662650603e-05, "loss": 0.7754, "step": 466 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.1253012048192773e-05, "loss": 0.8759, "step": 467 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.127710843373494e-05, "loss": 0.7516, "step": 468 }, { "epoch": 0.08, "grad_norm": 0.0, "learning_rate": 1.130120481927711e-05, "loss": 0.8196, "step": 469 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.132530120481928e-05, "loss": 0.8283, "step": 470 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1349397590361446e-05, "loss": 0.8446, "step": 471 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1373493975903616e-05, "loss": 0.8411, "step": 472 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1397590361445786e-05, "loss": 0.8355, "step": 473 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1421686746987952e-05, "loss": 0.7617, "step": 474 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1445783132530122e-05, "loss": 0.9067, "step": 475 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.146987951807229e-05, "loss": 0.8962, "step": 476 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1493975903614459e-05, "loss": 0.8859, "step": 477 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1518072289156627e-05, "loss": 0.847, "step": 478 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1542168674698797e-05, "loss": 0.7521, "step": 479 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1566265060240964e-05, "loss": 0.7441, "step": 480 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1590361445783133e-05, "loss": 0.8607, "step": 481 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1614457831325303e-05, "loss": 0.7474, "step": 482 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.163855421686747e-05, "loss": 0.8312, "step": 483 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.166265060240964e-05, "loss": 0.8511, "step": 484 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.168674698795181e-05, "loss": 0.7856, "step": 485 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1710843373493976e-05, "loss": 0.7838, "step": 486 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1734939759036146e-05, "loss": 1.0071, "step": 487 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1759036144578315e-05, "loss": 0.8415, "step": 488 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1783132530120483e-05, "loss": 0.8603, "step": 489 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1807228915662651e-05, "loss": 0.8542, "step": 490 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1831325301204821e-05, "loss": 0.8594, "step": 491 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.185542168674699e-05, "loss": 0.8449, "step": 492 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1879518072289157e-05, "loss": 0.7871, "step": 493 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1903614457831327e-05, "loss": 0.8376, "step": 494 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1927710843373494e-05, "loss": 0.8424, "step": 495 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1951807228915664e-05, "loss": 0.9506, "step": 496 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.1975903614457834e-05, "loss": 0.9134, "step": 497 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2e-05, "loss": 0.8681, "step": 498 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.202409638554217e-05, "loss": 0.7251, "step": 499 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.204819277108434e-05, "loss": 0.9102, "step": 500 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2072289156626507e-05, "loss": 0.7881, "step": 501 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2096385542168677e-05, "loss": 0.8971, "step": 502 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2120481927710845e-05, "loss": 0.8627, "step": 503 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2144578313253013e-05, "loss": 0.8287, "step": 504 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2168674698795181e-05, "loss": 0.8867, "step": 505 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2192771084337351e-05, "loss": 0.8633, "step": 506 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2216867469879518e-05, "loss": 0.9058, "step": 507 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2240963855421688e-05, "loss": 0.7838, "step": 508 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2265060240963858e-05, "loss": 0.8834, "step": 509 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2289156626506024e-05, "loss": 0.8071, "step": 510 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2313253012048194e-05, "loss": 0.93, "step": 511 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2337349397590364e-05, "loss": 0.8267, "step": 512 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.236144578313253e-05, "loss": 0.8023, "step": 513 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.23855421686747e-05, "loss": 0.9028, "step": 514 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2409638554216869e-05, "loss": 0.8454, "step": 515 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2433734939759037e-05, "loss": 0.8221, "step": 516 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2457831325301207e-05, "loss": 0.9282, "step": 517 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2481927710843375e-05, "loss": 0.8323, "step": 518 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2506024096385544e-05, "loss": 0.8791, "step": 519 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2530120481927712e-05, "loss": 0.8623, "step": 520 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.255421686746988e-05, "loss": 0.7571, "step": 521 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2578313253012048e-05, "loss": 0.8671, "step": 522 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2602409638554218e-05, "loss": 0.799, "step": 523 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2626506024096385e-05, "loss": 0.8893, "step": 524 }, { "epoch": 0.09, "grad_norm": 0.0, "learning_rate": 1.2650602409638555e-05, "loss": 0.7486, "step": 525 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2674698795180725e-05, "loss": 0.8452, "step": 526 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2698795180722891e-05, "loss": 0.9417, "step": 527 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2722891566265061e-05, "loss": 0.8736, "step": 528 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2746987951807231e-05, "loss": 0.8509, "step": 529 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2771084337349398e-05, "loss": 0.8012, "step": 530 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2795180722891567e-05, "loss": 0.9377, "step": 531 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2819277108433736e-05, "loss": 0.8622, "step": 532 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2843373493975904e-05, "loss": 0.8789, "step": 533 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2867469879518072e-05, "loss": 0.8014, "step": 534 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2891566265060242e-05, "loss": 0.8316, "step": 535 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.291566265060241e-05, "loss": 0.8038, "step": 536 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2939759036144579e-05, "loss": 0.7242, "step": 537 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2963855421686749e-05, "loss": 0.8131, "step": 538 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.2987951807228915e-05, "loss": 0.8081, "step": 539 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3012048192771085e-05, "loss": 0.9304, "step": 540 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3036144578313255e-05, "loss": 0.794, "step": 541 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3060240963855421e-05, "loss": 0.8852, "step": 542 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3084337349397591e-05, "loss": 0.8864, "step": 543 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3108433734939761e-05, "loss": 0.7802, "step": 544 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3132530120481928e-05, "loss": 0.8118, "step": 545 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3156626506024098e-05, "loss": 0.8076, "step": 546 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3180722891566266e-05, "loss": 0.8908, "step": 547 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3204819277108434e-05, "loss": 0.8483, "step": 548 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3228915662650603e-05, "loss": 0.8307, "step": 549 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3253012048192772e-05, "loss": 0.8604, "step": 550 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3277108433734939e-05, "loss": 0.8779, "step": 551 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3301204819277109e-05, "loss": 0.8029, "step": 552 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3325301204819279e-05, "loss": 0.868, "step": 553 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3349397590361445e-05, "loss": 0.7608, "step": 554 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3373493975903615e-05, "loss": 0.9113, "step": 555 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3397590361445785e-05, "loss": 0.8503, "step": 556 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3421686746987952e-05, "loss": 0.8543, "step": 557 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3445783132530122e-05, "loss": 0.8989, "step": 558 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.346987951807229e-05, "loss": 0.7899, "step": 559 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3493975903614458e-05, "loss": 0.9776, "step": 560 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3518072289156628e-05, "loss": 0.7025, "step": 561 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3542168674698796e-05, "loss": 0.769, "step": 562 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3566265060240965e-05, "loss": 0.9746, "step": 563 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3590361445783133e-05, "loss": 0.9438, "step": 564 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3614457831325303e-05, "loss": 0.8509, "step": 565 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.363855421686747e-05, "loss": 1.0924, "step": 566 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.366265060240964e-05, "loss": 0.8889, "step": 567 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.368674698795181e-05, "loss": 1.0002, "step": 568 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3710843373493976e-05, "loss": 0.8455, "step": 569 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3734939759036146e-05, "loss": 0.8976, "step": 570 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3759036144578316e-05, "loss": 0.7984, "step": 571 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3783132530120482e-05, "loss": 0.8466, "step": 572 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3807228915662652e-05, "loss": 0.6593, "step": 573 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.383132530120482e-05, "loss": 0.8727, "step": 574 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3855421686746989e-05, "loss": 0.8833, "step": 575 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3879518072289157e-05, "loss": 0.9065, "step": 576 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3903614457831327e-05, "loss": 0.8812, "step": 577 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3927710843373493e-05, "loss": 0.861, "step": 578 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3951807228915663e-05, "loss": 0.8173, "step": 579 }, { "epoch": 0.1, "grad_norm": 0.0, "learning_rate": 1.3975903614457833e-05, "loss": 0.9413, "step": 580 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4e-05, "loss": 0.8126, "step": 581 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.402409638554217e-05, "loss": 0.9758, "step": 582 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.404819277108434e-05, "loss": 0.8835, "step": 583 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4072289156626506e-05, "loss": 0.8417, "step": 584 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4096385542168676e-05, "loss": 0.9071, "step": 585 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4120481927710844e-05, "loss": 0.8677, "step": 586 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4144578313253013e-05, "loss": 0.8608, "step": 587 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4168674698795183e-05, "loss": 0.931, "step": 588 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.419277108433735e-05, "loss": 0.8535, "step": 589 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4216867469879519e-05, "loss": 0.7789, "step": 590 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4240963855421687e-05, "loss": 0.8881, "step": 591 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4265060240963857e-05, "loss": 0.7454, "step": 592 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4289156626506024e-05, "loss": 0.8539, "step": 593 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4313253012048194e-05, "loss": 0.9382, "step": 594 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4337349397590364e-05, "loss": 0.9057, "step": 595 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.436144578313253e-05, "loss": 0.8623, "step": 596 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.43855421686747e-05, "loss": 0.6726, "step": 597 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.440963855421687e-05, "loss": 0.8441, "step": 598 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4433734939759037e-05, "loss": 0.7323, "step": 599 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4457831325301207e-05, "loss": 0.8969, "step": 600 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4481927710843375e-05, "loss": 0.8202, "step": 601 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4506024096385543e-05, "loss": 0.7611, "step": 602 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4530120481927711e-05, "loss": 0.9939, "step": 603 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4554216867469881e-05, "loss": 0.822, "step": 604 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.457831325301205e-05, "loss": 0.6766, "step": 605 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4602409638554218e-05, "loss": 0.7999, "step": 606 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4626506024096388e-05, "loss": 0.9786, "step": 607 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4650602409638554e-05, "loss": 0.7723, "step": 608 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4674698795180724e-05, "loss": 0.6596, "step": 609 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4698795180722894e-05, "loss": 0.9159, "step": 610 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.472289156626506e-05, "loss": 0.9584, "step": 611 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.474698795180723e-05, "loss": 0.7351, "step": 612 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4771084337349399e-05, "loss": 0.8398, "step": 613 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4795180722891567e-05, "loss": 0.9796, "step": 614 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4819277108433737e-05, "loss": 0.8181, "step": 615 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4843373493975905e-05, "loss": 0.755, "step": 616 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4867469879518073e-05, "loss": 0.9854, "step": 617 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4891566265060242e-05, "loss": 0.7575, "step": 618 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4915662650602412e-05, "loss": 0.8028, "step": 619 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4939759036144578e-05, "loss": 0.8274, "step": 620 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4963855421686748e-05, "loss": 0.8674, "step": 621 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.4987951807228918e-05, "loss": 0.9235, "step": 622 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5012048192771084e-05, "loss": 0.7971, "step": 623 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5036144578313254e-05, "loss": 0.7705, "step": 624 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5060240963855424e-05, "loss": 0.9047, "step": 625 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5084337349397591e-05, "loss": 0.7812, "step": 626 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5108433734939761e-05, "loss": 0.8579, "step": 627 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5132530120481929e-05, "loss": 0.8601, "step": 628 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5156626506024097e-05, "loss": 0.9837, "step": 629 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5180722891566266e-05, "loss": 0.9944, "step": 630 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5204819277108436e-05, "loss": 0.9109, "step": 631 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5228915662650604e-05, "loss": 0.7928, "step": 632 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5253012048192772e-05, "loss": 0.8958, "step": 633 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.5277108433734942e-05, "loss": 0.7973, "step": 634 }, { "epoch": 0.11, "grad_norm": 0.0, "learning_rate": 1.530120481927711e-05, "loss": 0.7079, "step": 635 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.532530120481928e-05, "loss": 0.9375, "step": 636 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5349397590361447e-05, "loss": 0.9802, "step": 637 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5373493975903615e-05, "loss": 0.8597, "step": 638 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5397590361445783e-05, "loss": 0.7592, "step": 639 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5421686746987955e-05, "loss": 0.8096, "step": 640 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.544578313253012e-05, "loss": 0.7419, "step": 641 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.546987951807229e-05, "loss": 1.0459, "step": 642 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.549397590361446e-05, "loss": 0.7785, "step": 643 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5518072289156628e-05, "loss": 0.9085, "step": 644 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5542168674698796e-05, "loss": 0.8601, "step": 645 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5566265060240968e-05, "loss": 0.8755, "step": 646 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5590361445783132e-05, "loss": 0.9791, "step": 647 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5614457831325304e-05, "loss": 0.7793, "step": 648 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5638554216867472e-05, "loss": 0.9662, "step": 649 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.566265060240964e-05, "loss": 0.8003, "step": 650 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.568674698795181e-05, "loss": 0.758, "step": 651 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5710843373493977e-05, "loss": 1.0591, "step": 652 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5734939759036145e-05, "loss": 0.8274, "step": 653 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5759036144578313e-05, "loss": 0.8985, "step": 654 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5783132530120485e-05, "loss": 0.9405, "step": 655 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.580722891566265e-05, "loss": 0.9282, "step": 656 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.583132530120482e-05, "loss": 0.801, "step": 657 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.585542168674699e-05, "loss": 0.8932, "step": 658 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5879518072289158e-05, "loss": 0.9257, "step": 659 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5903614457831326e-05, "loss": 0.7884, "step": 660 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5927710843373495e-05, "loss": 1.0141, "step": 661 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5951807228915663e-05, "loss": 1.0272, "step": 662 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.5975903614457834e-05, "loss": 0.8473, "step": 663 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.7634, "step": 664 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.602409638554217e-05, "loss": 0.9654, "step": 665 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.604819277108434e-05, "loss": 0.8309, "step": 666 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6072289156626507e-05, "loss": 0.9582, "step": 667 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6096385542168676e-05, "loss": 0.8878, "step": 668 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6120481927710844e-05, "loss": 0.8225, "step": 669 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6144578313253015e-05, "loss": 0.832, "step": 670 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.616867469879518e-05, "loss": 0.9499, "step": 671 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6192771084337352e-05, "loss": 0.834, "step": 672 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.621686746987952e-05, "loss": 0.831, "step": 673 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.624096385542169e-05, "loss": 0.8661, "step": 674 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6265060240963857e-05, "loss": 0.8466, "step": 675 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6289156626506025e-05, "loss": 0.8928, "step": 676 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6313253012048193e-05, "loss": 0.9115, "step": 677 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.633734939759036e-05, "loss": 0.9855, "step": 678 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6361445783132533e-05, "loss": 0.815, "step": 679 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.63855421686747e-05, "loss": 0.9348, "step": 680 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.640963855421687e-05, "loss": 0.861, "step": 681 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6433734939759038e-05, "loss": 0.8358, "step": 682 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6457831325301206e-05, "loss": 1.0867, "step": 683 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6481927710843374e-05, "loss": 0.6373, "step": 684 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6506024096385546e-05, "loss": 1.0087, "step": 685 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.653012048192771e-05, "loss": 0.8495, "step": 686 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6554216867469882e-05, "loss": 0.7396, "step": 687 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.657831325301205e-05, "loss": 0.798, "step": 688 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.660240963855422e-05, "loss": 0.8467, "step": 689 }, { "epoch": 0.12, "grad_norm": 0.0, "learning_rate": 1.6626506024096387e-05, "loss": 0.9586, "step": 690 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6650602409638555e-05, "loss": 0.9936, "step": 691 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6674698795180724e-05, "loss": 0.7605, "step": 692 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6698795180722892e-05, "loss": 0.7684, "step": 693 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6722891566265063e-05, "loss": 0.9792, "step": 694 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6746987951807228e-05, "loss": 0.9256, "step": 695 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.67710843373494e-05, "loss": 0.9316, "step": 696 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6795180722891568e-05, "loss": 0.7979, "step": 697 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6819277108433736e-05, "loss": 0.8741, "step": 698 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6843373493975905e-05, "loss": 0.8998, "step": 699 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6867469879518076e-05, "loss": 0.8389, "step": 700 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.689156626506024e-05, "loss": 0.7375, "step": 701 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6915662650602413e-05, "loss": 0.8855, "step": 702 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.693975903614458e-05, "loss": 0.827, "step": 703 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.696385542168675e-05, "loss": 0.7798, "step": 704 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.6987951807228917e-05, "loss": 0.7572, "step": 705 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7012048192771086e-05, "loss": 0.7999, "step": 706 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7036144578313254e-05, "loss": 0.8334, "step": 707 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7060240963855422e-05, "loss": 0.9893, "step": 708 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7084337349397594e-05, "loss": 0.8817, "step": 709 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.710843373493976e-05, "loss": 0.7139, "step": 710 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.713253012048193e-05, "loss": 0.9197, "step": 711 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.71566265060241e-05, "loss": 0.9103, "step": 712 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7180722891566267e-05, "loss": 0.9862, "step": 713 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7204819277108435e-05, "loss": 0.8449, "step": 714 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7228915662650603e-05, "loss": 0.9471, "step": 715 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.725301204819277e-05, "loss": 0.9287, "step": 716 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7277108433734943e-05, "loss": 1.0588, "step": 717 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.730120481927711e-05, "loss": 0.7861, "step": 718 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.732530120481928e-05, "loss": 1.0188, "step": 719 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7349397590361448e-05, "loss": 0.7689, "step": 720 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7373493975903616e-05, "loss": 0.9644, "step": 721 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7397590361445784e-05, "loss": 0.9549, "step": 722 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7421686746987953e-05, "loss": 0.9066, "step": 723 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7445783132530124e-05, "loss": 0.7933, "step": 724 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.746987951807229e-05, "loss": 0.8386, "step": 725 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.749397590361446e-05, "loss": 0.9576, "step": 726 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7518072289156625e-05, "loss": 0.8234, "step": 727 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7542168674698797e-05, "loss": 0.8378, "step": 728 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7566265060240965e-05, "loss": 0.8597, "step": 729 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7590361445783134e-05, "loss": 0.9801, "step": 730 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7614457831325302e-05, "loss": 0.9092, "step": 731 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.763855421686747e-05, "loss": 0.7851, "step": 732 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.766265060240964e-05, "loss": 0.7855, "step": 733 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.768674698795181e-05, "loss": 0.9781, "step": 734 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7710843373493978e-05, "loss": 0.9778, "step": 735 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7734939759036146e-05, "loss": 0.7996, "step": 736 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7759036144578315e-05, "loss": 0.9467, "step": 737 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7783132530120483e-05, "loss": 0.9725, "step": 738 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.780722891566265e-05, "loss": 0.8942, "step": 739 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.783132530120482e-05, "loss": 0.8316, "step": 740 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.785542168674699e-05, "loss": 0.8433, "step": 741 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7879518072289156e-05, "loss": 0.6718, "step": 742 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7903614457831327e-05, "loss": 1.0013, "step": 743 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7927710843373496e-05, "loss": 0.8921, "step": 744 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7951807228915664e-05, "loss": 0.8882, "step": 745 }, { "epoch": 0.13, "grad_norm": 0.0, "learning_rate": 1.7975903614457832e-05, "loss": 0.9192, "step": 746 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8e-05, "loss": 0.7834, "step": 747 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.802409638554217e-05, "loss": 0.873, "step": 748 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8048192771084337e-05, "loss": 0.7706, "step": 749 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.807228915662651e-05, "loss": 0.907, "step": 750 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8096385542168677e-05, "loss": 0.7303, "step": 751 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8120481927710845e-05, "loss": 0.7942, "step": 752 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8144578313253013e-05, "loss": 0.9019, "step": 753 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.816867469879518e-05, "loss": 0.7818, "step": 754 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.819277108433735e-05, "loss": 1.0111, "step": 755 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.821686746987952e-05, "loss": 0.8021, "step": 756 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8240963855421686e-05, "loss": 0.9005, "step": 757 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8265060240963858e-05, "loss": 0.7481, "step": 758 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8289156626506026e-05, "loss": 0.9061, "step": 759 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8313253012048194e-05, "loss": 0.9498, "step": 760 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8337349397590363e-05, "loss": 1.016, "step": 761 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.836144578313253e-05, "loss": 0.9296, "step": 762 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.83855421686747e-05, "loss": 0.7885, "step": 763 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8409638554216867e-05, "loss": 0.8405, "step": 764 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.843373493975904e-05, "loss": 0.8757, "step": 765 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8457831325301204e-05, "loss": 0.828, "step": 766 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8481927710843375e-05, "loss": 0.7282, "step": 767 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8506024096385544e-05, "loss": 0.8626, "step": 768 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8530120481927712e-05, "loss": 0.7274, "step": 769 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.855421686746988e-05, "loss": 0.8006, "step": 770 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8578313253012052e-05, "loss": 0.9322, "step": 771 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8602409638554217e-05, "loss": 0.7694, "step": 772 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8626506024096388e-05, "loss": 0.7859, "step": 773 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8650602409638556e-05, "loss": 1.0279, "step": 774 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8674698795180725e-05, "loss": 0.8793, "step": 775 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8698795180722893e-05, "loss": 0.8994, "step": 776 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.872289156626506e-05, "loss": 0.906, "step": 777 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.874698795180723e-05, "loss": 0.825, "step": 778 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8771084337349398e-05, "loss": 0.8716, "step": 779 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.879518072289157e-05, "loss": 0.9188, "step": 780 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8819277108433734e-05, "loss": 0.896, "step": 781 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8843373493975906e-05, "loss": 0.8038, "step": 782 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8867469879518074e-05, "loss": 0.8157, "step": 783 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8891566265060242e-05, "loss": 0.8274, "step": 784 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.891566265060241e-05, "loss": 0.888, "step": 785 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.893975903614458e-05, "loss": 0.8686, "step": 786 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.8963855421686747e-05, "loss": 0.8559, "step": 787 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.898795180722892e-05, "loss": 0.8516, "step": 788 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.9012048192771087e-05, "loss": 0.9023, "step": 789 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.9036144578313255e-05, "loss": 0.8405, "step": 790 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.9060240963855423e-05, "loss": 0.834, "step": 791 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.908433734939759e-05, "loss": 0.7825, "step": 792 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.910843373493976e-05, "loss": 0.8559, "step": 793 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.9132530120481928e-05, "loss": 0.808, "step": 794 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.91566265060241e-05, "loss": 0.7825, "step": 795 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.9180722891566265e-05, "loss": 0.7218, "step": 796 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.9204819277108436e-05, "loss": 0.9176, "step": 797 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.9228915662650604e-05, "loss": 0.833, "step": 798 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.9253012048192773e-05, "loss": 0.9446, "step": 799 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.927710843373494e-05, "loss": 0.8025, "step": 800 }, { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 1.930120481927711e-05, "loss": 0.9198, "step": 801 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9325301204819277e-05, "loss": 0.798, "step": 802 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9349397590361446e-05, "loss": 0.9291, "step": 803 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9373493975903617e-05, "loss": 0.8073, "step": 804 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9397590361445785e-05, "loss": 0.7787, "step": 805 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9421686746987954e-05, "loss": 0.7095, "step": 806 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9445783132530122e-05, "loss": 0.8029, "step": 807 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.946987951807229e-05, "loss": 0.8224, "step": 808 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.949397590361446e-05, "loss": 0.8857, "step": 809 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.951807228915663e-05, "loss": 0.8699, "step": 810 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9542168674698795e-05, "loss": 0.7613, "step": 811 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9566265060240967e-05, "loss": 0.8645, "step": 812 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9590361445783135e-05, "loss": 0.8591, "step": 813 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9614457831325303e-05, "loss": 0.8572, "step": 814 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.963855421686747e-05, "loss": 0.8001, "step": 815 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.966265060240964e-05, "loss": 0.7536, "step": 816 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9686746987951808e-05, "loss": 0.8471, "step": 817 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9710843373493976e-05, "loss": 0.7776, "step": 818 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9734939759036148e-05, "loss": 0.8404, "step": 819 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9759036144578312e-05, "loss": 0.7624, "step": 820 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9783132530120484e-05, "loss": 0.943, "step": 821 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9807228915662652e-05, "loss": 0.7876, "step": 822 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.983132530120482e-05, "loss": 0.9445, "step": 823 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.985542168674699e-05, "loss": 0.8263, "step": 824 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.987951807228916e-05, "loss": 0.8443, "step": 825 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9903614457831325e-05, "loss": 1.0173, "step": 826 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9927710843373497e-05, "loss": 0.8667, "step": 827 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9951807228915665e-05, "loss": 0.8793, "step": 828 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9975903614457833e-05, "loss": 0.7559, "step": 829 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 2e-05, "loss": 0.9133, "step": 830 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999999801091438e-05, "loss": 0.7576, "step": 831 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.999999920436575e-05, "loss": 0.8624, "step": 832 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.999999820982296e-05, "loss": 0.89, "step": 833 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999996817463112e-05, "loss": 1.0168, "step": 834 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.999999502728626e-05, "loss": 0.892, "step": 835 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999992839292475e-05, "loss": 0.8599, "step": 836 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999990253481844e-05, "loss": 0.8293, "step": 837 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999987269854468e-05, "loss": 0.9658, "step": 838 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.999998388841047e-05, "loss": 0.7975, "step": 839 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.999998010914998e-05, "loss": 1.016, "step": 840 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999975932073154e-05, "loss": 0.9258, "step": 841 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999971357180152e-05, "loss": 0.8212, "step": 842 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999966384471157e-05, "loss": 0.8928, "step": 843 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999961013946372e-05, "loss": 0.8358, "step": 844 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999955245606004e-05, "loss": 0.8185, "step": 845 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999949079450287e-05, "loss": 0.8012, "step": 846 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999942515479464e-05, "loss": 0.8209, "step": 847 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999935553693796e-05, "loss": 0.8053, "step": 848 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999928194093563e-05, "loss": 1.1379, "step": 849 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999920436679053e-05, "loss": 0.9637, "step": 850 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999912281450577e-05, "loss": 0.9337, "step": 851 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999903728408463e-05, "loss": 1.0323, "step": 852 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999894777553045e-05, "loss": 0.8321, "step": 853 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999885428884684e-05, "loss": 0.7341, "step": 854 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.999987568240375e-05, "loss": 0.8077, "step": 855 }, { "epoch": 0.15, "grad_norm": 0.0, "learning_rate": 1.9999865538110628e-05, "loss": 0.8542, "step": 856 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999854996005726e-05, "loss": 0.8235, "step": 857 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999844056089463e-05, "loss": 0.7565, "step": 858 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999983271836227e-05, "loss": 0.9473, "step": 859 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999820982824603e-05, "loss": 0.8708, "step": 860 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999808849476925e-05, "loss": 0.9935, "step": 861 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999979631831972e-05, "loss": 0.799, "step": 862 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999783389353488e-05, "loss": 0.9539, "step": 863 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999770062578742e-05, "loss": 0.8694, "step": 864 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999756337996013e-05, "loss": 0.7749, "step": 865 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999742215605846e-05, "loss": 0.8854, "step": 866 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999727695408803e-05, "loss": 0.8677, "step": 867 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999712777405464e-05, "loss": 0.8148, "step": 868 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999697461596415e-05, "loss": 0.8096, "step": 869 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999681747982272e-05, "loss": 0.8437, "step": 870 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999966563656366e-05, "loss": 0.9462, "step": 871 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999964912734122e-05, "loss": 0.8343, "step": 872 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999632220315606e-05, "loss": 0.8126, "step": 873 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999614915487493e-05, "loss": 0.8073, "step": 874 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999597212857566e-05, "loss": 0.9341, "step": 875 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999579112426534e-05, "loss": 0.8509, "step": 876 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999560614195114e-05, "loss": 0.8708, "step": 877 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999541718164043e-05, "loss": 0.8041, "step": 878 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999952242433407e-05, "loss": 0.9326, "step": 879 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999502732705965e-05, "loss": 0.9056, "step": 880 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999482643280515e-05, "loss": 0.9275, "step": 881 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999462156058512e-05, "loss": 0.8811, "step": 882 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999441271040774e-05, "loss": 0.8545, "step": 883 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999419988228134e-05, "loss": 0.8546, "step": 884 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999398307621436e-05, "loss": 0.8986, "step": 885 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999376229221547e-05, "loss": 0.6647, "step": 886 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999353753029334e-05, "loss": 0.8343, "step": 887 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999330879045706e-05, "loss": 0.9235, "step": 888 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999307607271567e-05, "loss": 0.8431, "step": 889 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999283937707835e-05, "loss": 0.8355, "step": 890 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999259870355462e-05, "loss": 0.7072, "step": 891 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.99992354052154e-05, "loss": 0.8504, "step": 892 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999210542288627e-05, "loss": 0.8955, "step": 893 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999185281576126e-05, "loss": 0.9156, "step": 894 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999159623078904e-05, "loss": 0.8201, "step": 895 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999133566797985e-05, "loss": 0.9243, "step": 896 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999107112734402e-05, "loss": 0.7917, "step": 897 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999908026088921e-05, "loss": 0.8578, "step": 898 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9999053011263473e-05, "loss": 0.8454, "step": 899 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999902536385828e-05, "loss": 1.0023, "step": 900 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999899731867473e-05, "loss": 0.7764, "step": 901 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9998968875713935e-05, "loss": 0.8767, "step": 902 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999894003497703e-05, "loss": 1.0469, "step": 903 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999891079646516e-05, "loss": 0.9903, "step": 904 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9998881160179494e-05, "loss": 0.7718, "step": 905 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9998851126121205e-05, "loss": 0.9687, "step": 906 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9998820694291487e-05, "loss": 0.8478, "step": 907 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9998789864691554e-05, "loss": 0.7308, "step": 908 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.999875863732263e-05, "loss": 0.8497, "step": 909 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.9998727012185957e-05, "loss": 0.9439, "step": 910 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 1.99986949892828e-05, "loss": 1.0076, "step": 911 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998662568614425e-05, "loss": 1.032, "step": 912 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999862975018212e-05, "loss": 0.9422, "step": 913 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.99985965339872e-05, "loss": 0.7774, "step": 914 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998562920030983e-05, "loss": 0.8315, "step": 915 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.99985289083148e-05, "loss": 0.9256, "step": 916 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998494498840012e-05, "loss": 0.9526, "step": 917 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999845969160798e-05, "loss": 0.7886, "step": 918 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998424486620097e-05, "loss": 0.9146, "step": 919 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999838888387776e-05, "loss": 0.9528, "step": 920 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999835288338238e-05, "loss": 0.8793, "step": 921 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998316485135398e-05, "loss": 0.8527, "step": 922 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999827968913826e-05, "loss": 0.9029, "step": 923 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998242495392426e-05, "loss": 0.942, "step": 924 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998204903899377e-05, "loss": 0.9076, "step": 925 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999816691466061e-05, "loss": 0.8891, "step": 926 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998128527677634e-05, "loss": 0.811, "step": 927 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999808974295198e-05, "loss": 1.0059, "step": 928 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998050560485185e-05, "loss": 0.8204, "step": 929 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9998010980278814e-05, "loss": 1.0189, "step": 930 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997971002334434e-05, "loss": 0.8717, "step": 931 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997930626653646e-05, "loss": 0.7732, "step": 932 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997889853238047e-05, "loss": 0.89, "step": 933 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997848682089264e-05, "loss": 0.789, "step": 934 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997807113208934e-05, "loss": 0.7706, "step": 935 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997765146598707e-05, "loss": 0.9039, "step": 936 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997722782260257e-05, "loss": 0.882, "step": 937 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997680020195266e-05, "loss": 0.8633, "step": 938 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997636860405437e-05, "loss": 0.8092, "step": 939 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997593302892488e-05, "loss": 0.7828, "step": 940 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997549347658148e-05, "loss": 0.9112, "step": 941 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997504994704174e-05, "loss": 0.8657, "step": 942 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999746024403232e-05, "loss": 0.8875, "step": 943 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997415095644372e-05, "loss": 0.9031, "step": 944 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997369549542126e-05, "loss": 0.7671, "step": 945 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999732360572739e-05, "loss": 0.9869, "step": 946 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997277264201997e-05, "loss": 0.8515, "step": 947 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999723052496779e-05, "loss": 1.0781, "step": 948 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997183388026622e-05, "loss": 0.8765, "step": 949 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997135853380376e-05, "loss": 0.9252, "step": 950 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997087921030935e-05, "loss": 0.8083, "step": 951 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9997039590980215e-05, "loss": 1.0215, "step": 952 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996990863230136e-05, "loss": 0.9123, "step": 953 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996941737782628e-05, "loss": 0.8269, "step": 954 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999689221463966e-05, "loss": 0.7727, "step": 955 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996842293803186e-05, "loss": 0.9685, "step": 956 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996791975275206e-05, "loss": 0.86, "step": 957 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996741259057715e-05, "loss": 0.869, "step": 958 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999669014515273e-05, "loss": 0.7725, "step": 959 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996638633562286e-05, "loss": 0.7414, "step": 960 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996586724288433e-05, "loss": 0.9671, "step": 961 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996534417333232e-05, "loss": 0.7652, "step": 962 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999648171269877e-05, "loss": 0.8134, "step": 963 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996428610387138e-05, "loss": 0.9612, "step": 964 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999637511040045e-05, "loss": 0.842, "step": 965 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.999632121274084e-05, "loss": 0.8677, "step": 966 }, { "epoch": 0.17, "grad_norm": 0.0, "learning_rate": 1.9996266917410442e-05, "loss": 0.8256, "step": 967 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9996212224411428e-05, "loss": 0.8739, "step": 968 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9996157133745963e-05, "loss": 0.8176, "step": 969 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9996101645416242e-05, "loss": 0.9343, "step": 970 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9996045759424475e-05, "loss": 0.8429, "step": 971 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995989475772887e-05, "loss": 0.7874, "step": 972 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995932794463704e-05, "loss": 0.8674, "step": 973 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.99958757154992e-05, "loss": 0.9092, "step": 974 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995818238881633e-05, "loss": 0.9093, "step": 975 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995760364613294e-05, "loss": 0.8307, "step": 976 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999570209269648e-05, "loss": 0.9483, "step": 977 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995643423133516e-05, "loss": 0.8156, "step": 978 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995584355926733e-05, "loss": 0.9059, "step": 979 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995524891078482e-05, "loss": 0.7804, "step": 980 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995465028591122e-05, "loss": 0.9607, "step": 981 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995404768467046e-05, "loss": 0.8161, "step": 982 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995344110708645e-05, "loss": 0.862, "step": 983 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999528305531833e-05, "loss": 0.8649, "step": 984 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995221602298534e-05, "loss": 0.8936, "step": 985 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995159751651697e-05, "loss": 0.7444, "step": 986 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9995097503380286e-05, "loss": 0.833, "step": 987 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999503485748677e-05, "loss": 0.9352, "step": 988 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994971813973646e-05, "loss": 0.9975, "step": 989 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999490837284342e-05, "loss": 0.7614, "step": 990 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999484453409862e-05, "loss": 0.9195, "step": 991 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994780297741784e-05, "loss": 0.7767, "step": 992 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994715663775463e-05, "loss": 0.8014, "step": 993 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994650632202227e-05, "loss": 0.7439, "step": 994 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994585203024672e-05, "loss": 0.6594, "step": 995 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994519376245393e-05, "loss": 0.8149, "step": 996 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994453151867015e-05, "loss": 0.827, "step": 997 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994386529892168e-05, "loss": 0.9875, "step": 998 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.99943195103235e-05, "loss": 0.8296, "step": 999 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994252093163685e-05, "loss": 0.7062, "step": 1000 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994184278415402e-05, "loss": 0.9255, "step": 1001 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994116066081342e-05, "loss": 0.7893, "step": 1002 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9994047456164228e-05, "loss": 0.851, "step": 1003 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999397844866678e-05, "loss": 0.8362, "step": 1004 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9993909043591754e-05, "loss": 0.8794, "step": 1005 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9993839240941905e-05, "loss": 0.969, "step": 1006 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999376904072001e-05, "loss": 0.8847, "step": 1007 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9993698442928862e-05, "loss": 0.9265, "step": 1008 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9993627447571273e-05, "loss": 0.8015, "step": 1009 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999355605465006e-05, "loss": 0.8171, "step": 1010 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999348426416807e-05, "loss": 0.8518, "step": 1011 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9993412076128154e-05, "loss": 0.8254, "step": 1012 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9993339490533182e-05, "loss": 0.8676, "step": 1013 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999326650738605e-05, "loss": 1.063, "step": 1014 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999319312668966e-05, "loss": 0.9978, "step": 1015 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9993119348446927e-05, "loss": 0.9828, "step": 1016 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9993045172660785e-05, "loss": 0.9079, "step": 1017 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.999297059933419e-05, "loss": 0.8314, "step": 1018 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9992895628470098e-05, "loss": 0.9078, "step": 1019 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9992820260071503e-05, "loss": 0.7774, "step": 1020 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.99927444941414e-05, "loss": 0.9265, "step": 1021 }, { "epoch": 0.18, "grad_norm": 0.0, "learning_rate": 1.9992668330682806e-05, "loss": 0.9399, "step": 1022 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9992591769698745e-05, "loss": 0.8116, "step": 1023 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9992514811192263e-05, "loss": 0.8846, "step": 1024 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9992437455166425e-05, "loss": 0.8075, "step": 1025 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9992359701624305e-05, "loss": 0.8799, "step": 1026 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9992281550569003e-05, "loss": 1.0095, "step": 1027 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9992203002003622e-05, "loss": 0.7413, "step": 1028 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9992124055931287e-05, "loss": 0.7714, "step": 1029 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9992044712355137e-05, "loss": 0.8832, "step": 1030 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.999196497127833e-05, "loss": 1.0046, "step": 1031 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9991884832704044e-05, "loss": 0.8512, "step": 1032 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9991804296635463e-05, "loss": 0.7675, "step": 1033 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9991723363075787e-05, "loss": 0.8843, "step": 1034 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.999164203202824e-05, "loss": 0.7857, "step": 1035 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.999156030349606e-05, "loss": 0.8758, "step": 1036 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.999147817748249e-05, "loss": 0.8895, "step": 1037 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.99913956539908e-05, "loss": 0.7875, "step": 1038 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9991312733024277e-05, "loss": 1.0422, "step": 1039 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9991229414586217e-05, "loss": 0.7284, "step": 1040 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9991145698679937e-05, "loss": 0.939, "step": 1041 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9991061585308764e-05, "loss": 0.9359, "step": 1042 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9990977074476045e-05, "loss": 0.9177, "step": 1043 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9990892166185144e-05, "loss": 0.7641, "step": 1044 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9990806860439437e-05, "loss": 0.6479, "step": 1045 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9990721157242318e-05, "loss": 0.931, "step": 1046 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.999063505659719e-05, "loss": 0.75, "step": 1047 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9990548558507493e-05, "loss": 0.8663, "step": 1048 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9990461662976656e-05, "loss": 0.8474, "step": 1049 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9990374370008142e-05, "loss": 0.9204, "step": 1050 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.999028667960542e-05, "loss": 0.8899, "step": 1051 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9990198591771977e-05, "loss": 0.847, "step": 1052 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.999011010651132e-05, "loss": 0.8835, "step": 1053 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9990021223826973e-05, "loss": 0.7803, "step": 1054 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9989931943722464e-05, "loss": 0.8464, "step": 1055 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.998984226620135e-05, "loss": 0.7971, "step": 1056 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9989752191267197e-05, "loss": 0.8254, "step": 1057 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9989661718923587e-05, "loss": 0.8027, "step": 1058 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.998957084917412e-05, "loss": 0.8458, "step": 1059 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9989479582022415e-05, "loss": 0.762, "step": 1060 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9989387917472102e-05, "loss": 0.7873, "step": 1061 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.998929585552682e-05, "loss": 0.8996, "step": 1062 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9989203396190235e-05, "loss": 1.0377, "step": 1063 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.998911053946603e-05, "loss": 0.8441, "step": 1064 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9989017285357893e-05, "loss": 1.0018, "step": 1065 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9988923633869538e-05, "loss": 0.9535, "step": 1066 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9988829585004684e-05, "loss": 0.9789, "step": 1067 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9988735138767083e-05, "loss": 0.8234, "step": 1068 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9988640295160485e-05, "loss": 0.9189, "step": 1069 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9988545054188665e-05, "loss": 0.8436, "step": 1070 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.998844941585541e-05, "loss": 0.8417, "step": 1071 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.998835338016453e-05, "loss": 0.9121, "step": 1072 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9988256947119837e-05, "loss": 0.7792, "step": 1073 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9988160116725172e-05, "loss": 0.8446, "step": 1074 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9988062888984387e-05, "loss": 0.8353, "step": 1075 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.998796526390135e-05, "loss": 0.8431, "step": 1076 }, { "epoch": 0.19, "grad_norm": 0.0, "learning_rate": 1.9987867241479947e-05, "loss": 0.8461, "step": 1077 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9987768821724075e-05, "loss": 0.8762, "step": 1078 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9987670004637647e-05, "loss": 0.9354, "step": 1079 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.99875707902246e-05, "loss": 0.8729, "step": 1080 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9987471178488872e-05, "loss": 0.7681, "step": 1081 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998737116943443e-05, "loss": 0.8501, "step": 1082 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998727076306526e-05, "loss": 0.8146, "step": 1083 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9987169959385345e-05, "loss": 0.8669, "step": 1084 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9987068758398704e-05, "loss": 0.9079, "step": 1085 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9986967160109356e-05, "loss": 0.798, "step": 1086 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9986865164521348e-05, "loss": 0.8618, "step": 1087 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9986762771638734e-05, "loss": 0.9674, "step": 1088 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9986659981465588e-05, "loss": 0.8201, "step": 1089 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9986556794005997e-05, "loss": 0.6824, "step": 1090 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9986453209264074e-05, "loss": 0.8975, "step": 1091 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9986349227243933e-05, "loss": 0.9116, "step": 1092 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998624484794971e-05, "loss": 0.8542, "step": 1093 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9986140071385566e-05, "loss": 0.7192, "step": 1094 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9986034897555658e-05, "loss": 0.9441, "step": 1095 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9985929326464174e-05, "loss": 0.7772, "step": 1096 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9985823358115315e-05, "loss": 0.8554, "step": 1097 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9985716992513297e-05, "loss": 0.792, "step": 1098 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998561022966235e-05, "loss": 0.8937, "step": 1099 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998550306956672e-05, "loss": 0.8656, "step": 1100 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9985395512230676e-05, "loss": 0.8838, "step": 1101 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998528755765849e-05, "loss": 0.9366, "step": 1102 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998517920585446e-05, "loss": 0.9077, "step": 1103 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9985070456822894e-05, "loss": 0.9482, "step": 1104 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998496131056812e-05, "loss": 0.867, "step": 1105 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9984851767094482e-05, "loss": 0.9542, "step": 1106 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9984741826406337e-05, "loss": 0.8249, "step": 1107 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998463148850805e-05, "loss": 0.8136, "step": 1108 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9984520753404025e-05, "loss": 0.8972, "step": 1109 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9984409621098658e-05, "loss": 0.8765, "step": 1110 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9984298091596373e-05, "loss": 0.8657, "step": 1111 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9984186164901603e-05, "loss": 0.8353, "step": 1112 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9984073841018807e-05, "loss": 0.8149, "step": 1113 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998396111995245e-05, "loss": 0.9595, "step": 1114 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998384800170701e-05, "loss": 0.7405, "step": 1115 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9983734486287e-05, "loss": 0.7447, "step": 1116 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9983620573696927e-05, "loss": 0.9423, "step": 1117 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9983506263941327e-05, "loss": 0.8698, "step": 1118 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9983391557024737e-05, "loss": 0.9814, "step": 1119 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9983276452951736e-05, "loss": 0.875, "step": 1120 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9983160951726893e-05, "loss": 0.8511, "step": 1121 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9983045053354807e-05, "loss": 0.85, "step": 1122 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9982928757840084e-05, "loss": 0.9517, "step": 1123 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9982812065187355e-05, "loss": 0.7407, "step": 1124 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998269497540126e-05, "loss": 0.7764, "step": 1125 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9982577488486457e-05, "loss": 0.9776, "step": 1126 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9982459604447622e-05, "loss": 0.8558, "step": 1127 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998234132328944e-05, "loss": 0.9259, "step": 1128 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9982222645016623e-05, "loss": 0.9923, "step": 1129 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9982103569633887e-05, "loss": 0.8029, "step": 1130 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.998198409714597e-05, "loss": 0.9713, "step": 1131 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9981864227557628e-05, "loss": 0.8341, "step": 1132 }, { "epoch": 0.2, "grad_norm": 0.0, "learning_rate": 1.9981743960873626e-05, "loss": 0.8766, "step": 1133 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.998162329709875e-05, "loss": 0.7723, "step": 1134 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.99815022362378e-05, "loss": 0.9265, "step": 1135 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.998138077829559e-05, "loss": 0.8446, "step": 1136 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9981258923276954e-05, "loss": 0.6741, "step": 1137 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9981136671186738e-05, "loss": 0.9105, "step": 1138 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.998101402202981e-05, "loss": 0.8281, "step": 1139 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9980890975811042e-05, "loss": 0.8524, "step": 1140 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9980767532535336e-05, "loss": 0.8888, "step": 1141 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9980643692207597e-05, "loss": 1.0167, "step": 1142 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9980519454832757e-05, "loss": 0.8276, "step": 1143 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9980394820415753e-05, "loss": 0.9306, "step": 1144 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.998026978896155e-05, "loss": 0.8501, "step": 1145 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.998014436047511e-05, "loss": 0.7296, "step": 1146 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9980018534961436e-05, "loss": 0.9217, "step": 1147 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9979892312425527e-05, "loss": 0.8118, "step": 1148 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9979765692872404e-05, "loss": 0.8904, "step": 1149 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9979638676307106e-05, "loss": 0.8925, "step": 1150 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9979511262734686e-05, "loss": 0.7798, "step": 1151 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.997938345216021e-05, "loss": 1.0262, "step": 1152 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.997925524458877e-05, "loss": 0.7825, "step": 1153 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9979126640025455e-05, "loss": 0.8592, "step": 1154 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9978997638475392e-05, "loss": 0.7866, "step": 1155 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9978868239943704e-05, "loss": 0.9926, "step": 1156 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9978738444435544e-05, "loss": 0.8232, "step": 1157 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9978608251956073e-05, "loss": 0.8122, "step": 1158 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9978477662510474e-05, "loss": 0.7995, "step": 1159 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.997834667610394e-05, "loss": 1.1027, "step": 1160 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9978215292741677e-05, "loss": 0.89, "step": 1161 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9978083512428917e-05, "loss": 0.9043, "step": 1162 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9977951335170903e-05, "loss": 0.6761, "step": 1163 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9977818760972895e-05, "loss": 0.8778, "step": 1164 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.997768578984016e-05, "loss": 0.8145, "step": 1165 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.997755242177799e-05, "loss": 0.8916, "step": 1166 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9977418656791697e-05, "loss": 0.7988, "step": 1167 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9977284494886594e-05, "loss": 0.8946, "step": 1168 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9977149936068022e-05, "loss": 0.9676, "step": 1169 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9977014980341334e-05, "loss": 0.9445, "step": 1170 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.99768796277119e-05, "loss": 0.9118, "step": 1171 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9976743878185104e-05, "loss": 0.8962, "step": 1172 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9976607731766343e-05, "loss": 0.9326, "step": 1173 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9976471188461034e-05, "loss": 0.8491, "step": 1174 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9976334248274613e-05, "loss": 0.8607, "step": 1175 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.997619691121252e-05, "loss": 0.9526, "step": 1176 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9976059177280233e-05, "loss": 1.0367, "step": 1177 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9975921046483217e-05, "loss": 1.0035, "step": 1178 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9975782518826972e-05, "loss": 0.893, "step": 1179 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9975643594317008e-05, "loss": 1.108, "step": 1180 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9975504272958854e-05, "loss": 0.7465, "step": 1181 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.997536455475805e-05, "loss": 0.7833, "step": 1182 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9975224439720157e-05, "loss": 0.7777, "step": 1183 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9975083927850747e-05, "loss": 0.8441, "step": 1184 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9974943019155408e-05, "loss": 0.7511, "step": 1185 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9974801713639752e-05, "loss": 0.9883, "step": 1186 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.9974660011309392e-05, "loss": 0.7997, "step": 1187 }, { "epoch": 0.21, "grad_norm": 0.0, "learning_rate": 1.997451791216997e-05, "loss": 0.8232, "step": 1188 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.997437541622714e-05, "loss": 0.8514, "step": 1189 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.997423252348657e-05, "loss": 0.8226, "step": 1190 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9974089233953938e-05, "loss": 0.7506, "step": 1191 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.997394554763495e-05, "loss": 0.8887, "step": 1192 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9973801464535326e-05, "loss": 0.9596, "step": 1193 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9973656984660793e-05, "loss": 0.812, "step": 1194 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9973512108017098e-05, "loss": 0.8317, "step": 1195 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9973366834610008e-05, "loss": 0.894, "step": 1196 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9973221164445295e-05, "loss": 0.814, "step": 1197 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.997307509752876e-05, "loss": 0.9468, "step": 1198 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9972928633866216e-05, "loss": 0.7546, "step": 1199 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9972781773463488e-05, "loss": 0.8883, "step": 1200 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9972634516326415e-05, "loss": 0.8891, "step": 1201 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9972486862460853e-05, "loss": 0.7264, "step": 1202 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9972338811872683e-05, "loss": 0.8912, "step": 1203 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9972190364567793e-05, "loss": 0.9186, "step": 1204 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9972041520552087e-05, "loss": 0.9562, "step": 1205 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9971892279831482e-05, "loss": 0.8458, "step": 1206 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.997174264241192e-05, "loss": 0.9751, "step": 1207 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9971592608299356e-05, "loss": 0.8044, "step": 1208 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9971442177499757e-05, "loss": 0.719, "step": 1209 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.99712913500191e-05, "loss": 0.9323, "step": 1210 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9971140125863396e-05, "loss": 0.9498, "step": 1211 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9970988505038654e-05, "loss": 0.8353, "step": 1212 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.997083648755091e-05, "loss": 0.8273, "step": 1213 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.997068407340621e-05, "loss": 0.956, "step": 1214 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9970531262610618e-05, "loss": 0.8656, "step": 1215 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.997037805517021e-05, "loss": 0.9821, "step": 1216 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9970224451091082e-05, "loss": 0.9328, "step": 1217 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9970070450379343e-05, "loss": 0.8265, "step": 1218 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9969916053041126e-05, "loss": 0.7207, "step": 1219 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.996976125908257e-05, "loss": 0.8954, "step": 1220 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.996960606850983e-05, "loss": 0.9565, "step": 1221 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9969450481329077e-05, "loss": 0.8307, "step": 1222 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9969294497546512e-05, "loss": 0.8604, "step": 1223 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9969138117168332e-05, "loss": 0.9865, "step": 1224 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.996898134020076e-05, "loss": 0.8611, "step": 1225 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.996882416665003e-05, "loss": 0.9839, "step": 1226 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.99686665965224e-05, "loss": 0.9017, "step": 1227 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.996850862982413e-05, "loss": 0.8286, "step": 1228 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9968350266561516e-05, "loss": 1.0027, "step": 1229 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9968191506740847e-05, "loss": 0.7029, "step": 1230 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9968032350368447e-05, "loss": 0.7273, "step": 1231 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9967872797450643e-05, "loss": 0.766, "step": 1232 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9967712847993782e-05, "loss": 0.8394, "step": 1233 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.996755250200423e-05, "loss": 0.8996, "step": 1234 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9967391759488365e-05, "loss": 0.7917, "step": 1235 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.996723062045258e-05, "loss": 0.8129, "step": 1236 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9967069084903282e-05, "loss": 0.868, "step": 1237 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9966907152846905e-05, "loss": 0.9427, "step": 1238 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9966744824289886e-05, "loss": 0.8618, "step": 1239 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9966582099238682e-05, "loss": 0.7787, "step": 1240 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.996641897769977e-05, "loss": 0.8551, "step": 1241 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.996625545967964e-05, "loss": 0.8782, "step": 1242 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 1.9966091545184795e-05, "loss": 0.8614, "step": 1243 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9965927234221752e-05, "loss": 0.7363, "step": 1244 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996576252679705e-05, "loss": 0.8622, "step": 1245 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9965597422917248e-05, "loss": 0.9212, "step": 1246 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9965431922588905e-05, "loss": 0.8684, "step": 1247 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9965266025818608e-05, "loss": 0.8242, "step": 1248 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996509973261296e-05, "loss": 0.7406, "step": 1249 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996493304297857e-05, "loss": 0.9225, "step": 1250 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9964765956922075e-05, "loss": 0.776, "step": 1251 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996459847445012e-05, "loss": 0.865, "step": 1252 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996443059556936e-05, "loss": 0.7086, "step": 1253 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996426232028649e-05, "loss": 0.8633, "step": 1254 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9964093648608193e-05, "loss": 0.9675, "step": 1255 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9963924580541182e-05, "loss": 0.8719, "step": 1256 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9963755116092178e-05, "loss": 0.981, "step": 1257 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9963585255267926e-05, "loss": 0.8778, "step": 1258 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996341499807519e-05, "loss": 0.9323, "step": 1259 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9963244344520737e-05, "loss": 0.8622, "step": 1260 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996307329461135e-05, "loss": 0.7198, "step": 1261 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9962901848353842e-05, "loss": 0.826, "step": 1262 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9962730005755033e-05, "loss": 0.8096, "step": 1263 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9962557766821756e-05, "loss": 0.9278, "step": 1264 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9962385131560865e-05, "loss": 0.8462, "step": 1265 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9962212099979225e-05, "loss": 0.8189, "step": 1266 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9962038672083723e-05, "loss": 0.756, "step": 1267 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9961864847881255e-05, "loss": 0.8568, "step": 1268 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996169062737874e-05, "loss": 0.8445, "step": 1269 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9961516010583106e-05, "loss": 1.0777, "step": 1270 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.99613409975013e-05, "loss": 0.8494, "step": 1271 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9961165588140284e-05, "loss": 0.8277, "step": 1272 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9960989782507035e-05, "loss": 0.8158, "step": 1273 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9960813580608552e-05, "loss": 0.9102, "step": 1274 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.996063698245184e-05, "loss": 0.7773, "step": 1275 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9960459988043924e-05, "loss": 0.7772, "step": 1276 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9960282597391843e-05, "loss": 0.71, "step": 1277 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9960104810502662e-05, "loss": 0.7952, "step": 1278 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.995992662738345e-05, "loss": 0.8312, "step": 1279 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9959748048041292e-05, "loss": 0.9735, "step": 1280 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9959569072483296e-05, "loss": 0.7951, "step": 1281 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9959389700716578e-05, "loss": 0.8876, "step": 1282 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.995920993274828e-05, "loss": 0.8785, "step": 1283 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9959029768585546e-05, "loss": 0.8108, "step": 1284 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.995884920823555e-05, "loss": 0.8212, "step": 1285 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.995866825170547e-05, "loss": 0.803, "step": 1286 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.995848689900251e-05, "loss": 0.7466, "step": 1287 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9958305150133878e-05, "loss": 0.787, "step": 1288 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.995812300510681e-05, "loss": 0.8161, "step": 1289 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9957940463928548e-05, "loss": 0.8089, "step": 1290 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9957757526606353e-05, "loss": 0.8264, "step": 1291 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.995757419314751e-05, "loss": 0.9549, "step": 1292 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.99573904635593e-05, "loss": 0.9278, "step": 1293 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9957206337849048e-05, "loss": 0.9219, "step": 1294 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9957021816024067e-05, "loss": 0.8568, "step": 1295 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.99568368980917e-05, "loss": 0.8789, "step": 1296 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9956651584059305e-05, "loss": 1.0579, "step": 1297 }, { "epoch": 0.23, "grad_norm": 0.0, "learning_rate": 1.9956465873934255e-05, "loss": 1.0255, "step": 1298 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995627976772393e-05, "loss": 0.9362, "step": 1299 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995609326543575e-05, "loss": 0.9039, "step": 1300 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9955906367077116e-05, "loss": 0.8358, "step": 1301 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9955719072655472e-05, "loss": 0.8901, "step": 1302 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995553138217827e-05, "loss": 0.8573, "step": 1303 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9955343295652977e-05, "loss": 0.8808, "step": 1304 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995515481308707e-05, "loss": 0.8213, "step": 1305 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995496593448805e-05, "loss": 0.954, "step": 1306 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995477665986343e-05, "loss": 0.8507, "step": 1307 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9954586989220748e-05, "loss": 0.7162, "step": 1308 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9954396922567536e-05, "loss": 0.8335, "step": 1309 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9954206459911364e-05, "loss": 0.7958, "step": 1310 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9954015601259805e-05, "loss": 1.0035, "step": 1311 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9953824346620455e-05, "loss": 0.7897, "step": 1312 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995363269600092e-05, "loss": 0.8584, "step": 1313 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9953440649408824e-05, "loss": 0.8594, "step": 1314 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995324820685181e-05, "loss": 0.8891, "step": 1315 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995305536833753e-05, "loss": 0.9497, "step": 1316 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9952862133873655e-05, "loss": 0.851, "step": 1317 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995266850346788e-05, "loss": 0.7846, "step": 1318 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9952474477127897e-05, "loss": 0.7925, "step": 1319 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9952280054861427e-05, "loss": 0.8527, "step": 1320 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9952085236676212e-05, "loss": 0.7487, "step": 1321 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9951890022579998e-05, "loss": 0.8379, "step": 1322 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9951694412580547e-05, "loss": 0.9127, "step": 1323 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995149840668565e-05, "loss": 0.756, "step": 1324 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9951302004903093e-05, "loss": 0.7165, "step": 1325 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9951105207240695e-05, "loss": 0.7752, "step": 1326 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9950908013706285e-05, "loss": 0.7776, "step": 1327 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995071042430771e-05, "loss": 0.8116, "step": 1328 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9950512439052824e-05, "loss": 0.839, "step": 1329 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.995031405794951e-05, "loss": 0.7834, "step": 1330 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9950115281005655e-05, "loss": 0.8976, "step": 1331 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.994991610822917e-05, "loss": 0.8161, "step": 1332 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.994971653962798e-05, "loss": 0.9662, "step": 1333 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9949516575210015e-05, "loss": 0.76, "step": 1334 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.994931621498324e-05, "loss": 0.8676, "step": 1335 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.994911545895562e-05, "loss": 1.0584, "step": 1336 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9948914307135142e-05, "loss": 0.8102, "step": 1337 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9948712759529813e-05, "loss": 0.9031, "step": 1338 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9948510816147647e-05, "loss": 0.8304, "step": 1339 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.994830847699668e-05, "loss": 0.856, "step": 1340 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9948105742084952e-05, "loss": 0.9072, "step": 1341 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.994790261142054e-05, "loss": 0.8945, "step": 1342 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.994769908501152e-05, "loss": 0.8019, "step": 1343 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9947495162865992e-05, "loss": 0.8899, "step": 1344 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9947290844992063e-05, "loss": 0.9414, "step": 1345 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9947086131397862e-05, "loss": 0.7764, "step": 1346 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9946881022091536e-05, "loss": 1.0067, "step": 1347 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9946675517081242e-05, "loss": 0.8104, "step": 1348 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9946469616375157e-05, "loss": 0.8571, "step": 1349 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9946263319981473e-05, "loss": 0.9451, "step": 1350 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9946056627908392e-05, "loss": 0.7624, "step": 1351 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.994584954016414e-05, "loss": 0.8314, "step": 1352 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9945642056756956e-05, "loss": 0.9589, "step": 1353 }, { "epoch": 0.24, "grad_norm": 0.0, "learning_rate": 1.9945434177695095e-05, "loss": 0.8187, "step": 1354 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9945225902986824e-05, "loss": 1.014, "step": 1355 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9945017232640428e-05, "loss": 0.8405, "step": 1356 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9944808166664213e-05, "loss": 0.7899, "step": 1357 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.994459870506649e-05, "loss": 1.0029, "step": 1358 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9944388847855595e-05, "loss": 0.8096, "step": 1359 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9944178595039877e-05, "loss": 0.9053, "step": 1360 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9943967946627695e-05, "loss": 0.8658, "step": 1361 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9943756902627436e-05, "loss": 0.8287, "step": 1362 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9943545463047494e-05, "loss": 0.9072, "step": 1363 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.994333362789628e-05, "loss": 0.7823, "step": 1364 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.994312139718222e-05, "loss": 0.8791, "step": 1365 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9942908770913756e-05, "loss": 0.8588, "step": 1366 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9942695749099344e-05, "loss": 0.9229, "step": 1367 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.994248233174747e-05, "loss": 0.8641, "step": 1368 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9942268518866613e-05, "loss": 0.8318, "step": 1369 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.994205431046528e-05, "loss": 0.8399, "step": 1370 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9941839706551997e-05, "loss": 0.8023, "step": 1371 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.99416247071353e-05, "loss": 0.7379, "step": 1372 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9941409312223737e-05, "loss": 0.9102, "step": 1373 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9941193521825885e-05, "loss": 0.8185, "step": 1374 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9940977335950324e-05, "loss": 0.9459, "step": 1375 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.994076075460566e-05, "loss": 0.8323, "step": 1376 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9940543777800493e-05, "loss": 0.7796, "step": 1377 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9940326405543472e-05, "loss": 1.0099, "step": 1378 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9940108637843237e-05, "loss": 0.8649, "step": 1379 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993989047470845e-05, "loss": 0.8726, "step": 1380 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9939671916147792e-05, "loss": 0.882, "step": 1381 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993945296216996e-05, "loss": 0.8923, "step": 1382 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9939233612783662e-05, "loss": 0.9683, "step": 1383 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993901386799762e-05, "loss": 0.9077, "step": 1384 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9938793727820585e-05, "loss": 0.8894, "step": 1385 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993857319226131e-05, "loss": 0.9676, "step": 1386 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993835226132856e-05, "loss": 0.8744, "step": 1387 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9938130935031136e-05, "loss": 0.9081, "step": 1388 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993790921337784e-05, "loss": 0.8403, "step": 1389 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993768709637749e-05, "loss": 0.8929, "step": 1390 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9937464584038926e-05, "loss": 0.8225, "step": 1391 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9937241676370992e-05, "loss": 0.9055, "step": 1392 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9937018373382564e-05, "loss": 0.836, "step": 1393 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993679467508252e-05, "loss": 0.78, "step": 1394 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9936570581479763e-05, "loss": 0.9117, "step": 1395 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9936346092583205e-05, "loss": 0.8913, "step": 1396 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993612120840178e-05, "loss": 0.7459, "step": 1397 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9935895928944428e-05, "loss": 0.9497, "step": 1398 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9935670254220115e-05, "loss": 0.9257, "step": 1399 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9935444184237817e-05, "loss": 0.913, "step": 1400 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993521771900653e-05, "loss": 0.8589, "step": 1401 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9934990858535266e-05, "loss": 0.9403, "step": 1402 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9934763602833043e-05, "loss": 0.7907, "step": 1403 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9934535951908907e-05, "loss": 0.8641, "step": 1404 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.993430790577191e-05, "loss": 0.822, "step": 1405 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9934079464431128e-05, "loss": 0.8228, "step": 1406 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9933850627895643e-05, "loss": 0.7651, "step": 1407 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9933621396174568e-05, "loss": 1.0123, "step": 1408 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 1.9933391769277013e-05, "loss": 0.943, "step": 1409 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9933161747212114e-05, "loss": 0.8053, "step": 1410 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.993293132998903e-05, "loss": 0.8837, "step": 1411 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9932700517616918e-05, "loss": 0.8736, "step": 1412 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9932469310104963e-05, "loss": 0.9238, "step": 1413 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9932237707462368e-05, "loss": 0.9268, "step": 1414 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9932005709698337e-05, "loss": 0.8251, "step": 1415 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.993177331682211e-05, "loss": 0.8341, "step": 1416 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9931540528842923e-05, "loss": 0.8237, "step": 1417 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.993130734577004e-05, "loss": 0.779, "step": 1418 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9931073767612735e-05, "loss": 0.9699, "step": 1419 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9930839794380308e-05, "loss": 0.8746, "step": 1420 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.993060542608206e-05, "loss": 0.9671, "step": 1421 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9930370662727315e-05, "loss": 0.8539, "step": 1422 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9930135504325412e-05, "loss": 0.8951, "step": 1423 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992989995088571e-05, "loss": 0.7618, "step": 1424 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9929664002417576e-05, "loss": 0.7514, "step": 1425 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9929427658930396e-05, "loss": 0.8369, "step": 1426 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9929190920433578e-05, "loss": 0.9826, "step": 1427 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992895378693653e-05, "loss": 0.827, "step": 1428 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9928716258448693e-05, "loss": 0.9059, "step": 1429 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9928478334979515e-05, "loss": 0.7443, "step": 1430 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9928240016538463e-05, "loss": 0.9367, "step": 1431 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9928001303135012e-05, "loss": 0.8758, "step": 1432 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992776219477866e-05, "loss": 0.9245, "step": 1433 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9927522691478923e-05, "loss": 0.7953, "step": 1434 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9927282793245327e-05, "loss": 0.9053, "step": 1435 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9927042500087418e-05, "loss": 0.8749, "step": 1436 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9926801812014748e-05, "loss": 0.7449, "step": 1437 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9926560729036898e-05, "loss": 0.9381, "step": 1438 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9926319251163454e-05, "loss": 0.8843, "step": 1439 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9926077378404027e-05, "loss": 0.9018, "step": 1440 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992583511076824e-05, "loss": 0.9954, "step": 1441 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992559244826573e-05, "loss": 0.8561, "step": 1442 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9925349390906143e-05, "loss": 0.9065, "step": 1443 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9925105938699155e-05, "loss": 0.8805, "step": 1444 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9924862091654453e-05, "loss": 0.8625, "step": 1445 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9924617849781734e-05, "loss": 0.7816, "step": 1446 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9924373213090716e-05, "loss": 0.7769, "step": 1447 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992412818159113e-05, "loss": 0.8258, "step": 1448 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992388275529272e-05, "loss": 0.7233, "step": 1449 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9923636934205257e-05, "loss": 0.9227, "step": 1450 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9923390718338518e-05, "loss": 0.8886, "step": 1451 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9923144107702295e-05, "loss": 0.9596, "step": 1452 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9922897102306398e-05, "loss": 0.9113, "step": 1453 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992264970216066e-05, "loss": 0.9295, "step": 1454 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9922401907274913e-05, "loss": 0.7423, "step": 1455 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9922153717659024e-05, "loss": 0.9216, "step": 1456 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992190513332286e-05, "loss": 0.7783, "step": 1457 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9921656154276313e-05, "loss": 0.7807, "step": 1458 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992140678052929e-05, "loss": 0.7545, "step": 1459 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9921157012091704e-05, "loss": 0.863, "step": 1460 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9920906848973504e-05, "loss": 0.7777, "step": 1461 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9920656291184625e-05, "loss": 0.8509, "step": 1462 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.992040533873505e-05, "loss": 0.757, "step": 1463 }, { "epoch": 0.26, "grad_norm": 0.0, "learning_rate": 1.9920153991634757e-05, "loss": 0.8175, "step": 1464 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9919902249893738e-05, "loss": 0.7765, "step": 1465 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9919650113522017e-05, "loss": 0.8324, "step": 1466 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9919397582529623e-05, "loss": 0.8521, "step": 1467 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9919144656926598e-05, "loss": 0.7975, "step": 1468 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9918891336723005e-05, "loss": 0.7621, "step": 1469 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9918637621928924e-05, "loss": 0.7054, "step": 1470 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991838351255445e-05, "loss": 0.908, "step": 1471 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9918129008609684e-05, "loss": 0.922, "step": 1472 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991787411010476e-05, "loss": 0.9078, "step": 1473 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991761881704981e-05, "loss": 0.888, "step": 1474 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9917363129454995e-05, "loss": 0.8418, "step": 1475 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9917107047330485e-05, "loss": 0.7146, "step": 1476 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9916850570686465e-05, "loss": 0.6907, "step": 1477 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9916593699533143e-05, "loss": 0.8247, "step": 1478 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9916336433880738e-05, "loss": 0.9212, "step": 1479 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9916078773739478e-05, "loss": 0.9328, "step": 1480 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991582071911962e-05, "loss": 0.7988, "step": 1481 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9915562270031426e-05, "loss": 0.8105, "step": 1482 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991530342648518e-05, "loss": 0.8863, "step": 1483 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9915044188491175e-05, "loss": 0.9975, "step": 1484 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991478455605973e-05, "loss": 1.1184, "step": 1485 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9914524529201167e-05, "loss": 1.0861, "step": 1486 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9914264107925833e-05, "loss": 0.8207, "step": 1487 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9914003292244094e-05, "loss": 0.8447, "step": 1488 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9913742082166316e-05, "loss": 0.693, "step": 1489 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9913480477702896e-05, "loss": 0.9221, "step": 1490 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991321847886424e-05, "loss": 1.004, "step": 1491 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9912956085660774e-05, "loss": 0.898, "step": 1492 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9912693298102926e-05, "loss": 0.7772, "step": 1493 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9912430116201165e-05, "loss": 0.9087, "step": 1494 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9912166539965946e-05, "loss": 0.9475, "step": 1495 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9911902569407765e-05, "loss": 0.737, "step": 1496 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991163820453712e-05, "loss": 0.9873, "step": 1497 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9911373445364526e-05, "loss": 0.8096, "step": 1498 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991110829190052e-05, "loss": 0.9147, "step": 1499 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9910842744155647e-05, "loss": 0.7302, "step": 1500 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991057680214047e-05, "loss": 0.8395, "step": 1501 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991031046586557e-05, "loss": 0.7222, "step": 1502 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.991004373534154e-05, "loss": 0.8691, "step": 1503 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9909776610578995e-05, "loss": 0.7959, "step": 1504 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9909509091588562e-05, "loss": 0.8611, "step": 1505 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.990924117838088e-05, "loss": 0.8996, "step": 1506 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.990897287096661e-05, "loss": 0.8134, "step": 1507 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9908704169356424e-05, "loss": 0.7365, "step": 1508 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9908435073561013e-05, "loss": 0.9253, "step": 1509 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.990816558359108e-05, "loss": 0.7306, "step": 1510 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9907895699457344e-05, "loss": 0.9305, "step": 1511 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9907625421170542e-05, "loss": 0.7689, "step": 1512 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.990735474874143e-05, "loss": 0.9033, "step": 1513 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9907083682180777e-05, "loss": 0.9183, "step": 1514 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.990681222149936e-05, "loss": 0.9929, "step": 1515 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9906540366707982e-05, "loss": 0.8239, "step": 1516 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.9906268117817457e-05, "loss": 0.9348, "step": 1517 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.990599547483862e-05, "loss": 0.7752, "step": 1518 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 1.990572243778231e-05, "loss": 0.8849, "step": 1519 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9905449006659388e-05, "loss": 0.9933, "step": 1520 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.990517518148074e-05, "loss": 0.8636, "step": 1521 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9904900962257255e-05, "loss": 0.8967, "step": 1522 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.990462634899984e-05, "loss": 0.7858, "step": 1523 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9904351341719425e-05, "loss": 0.7222, "step": 1524 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9904075940426942e-05, "loss": 0.8813, "step": 1525 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9903800145133353e-05, "loss": 0.7737, "step": 1526 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.990352395584963e-05, "loss": 0.7423, "step": 1527 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9903247372586752e-05, "loss": 0.7014, "step": 1528 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9902970395355735e-05, "loss": 0.8497, "step": 1529 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.990269302416759e-05, "loss": 0.9006, "step": 1530 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.990241525903335e-05, "loss": 0.896, "step": 1531 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.990213709996407e-05, "loss": 0.7387, "step": 1532 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.990185854697081e-05, "loss": 0.7855, "step": 1533 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9901579600064654e-05, "loss": 0.9239, "step": 1534 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.99013002592567e-05, "loss": 0.9125, "step": 1535 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.990102052455806e-05, "loss": 0.8814, "step": 1536 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9900740395979863e-05, "loss": 0.764, "step": 1537 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9900459873533252e-05, "loss": 0.7613, "step": 1538 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9900178957229385e-05, "loss": 0.8755, "step": 1539 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9899897647079444e-05, "loss": 0.9207, "step": 1540 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.989961594309461e-05, "loss": 0.9241, "step": 1541 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.98993338452861e-05, "loss": 0.9016, "step": 1542 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9899051353665126e-05, "loss": 0.7781, "step": 1543 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.989876846824293e-05, "loss": 1.0182, "step": 1544 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.989848518903077e-05, "loss": 0.785, "step": 1545 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9898201516039913e-05, "loss": 0.9173, "step": 1546 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.989791744928164e-05, "loss": 0.9356, "step": 1547 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9897632988767257e-05, "loss": 0.757, "step": 1548 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9897348134508078e-05, "loss": 0.8061, "step": 1549 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9897062886515436e-05, "loss": 0.808, "step": 1550 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9896777244800676e-05, "loss": 0.9077, "step": 1551 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.989649120937516e-05, "loss": 0.8825, "step": 1552 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9896204780250276e-05, "loss": 0.8346, "step": 1553 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.989591795743741e-05, "loss": 0.8533, "step": 1554 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9895630740947975e-05, "loss": 0.7717, "step": 1555 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.98953431307934e-05, "loss": 0.8887, "step": 1556 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9895055126985122e-05, "loss": 0.8497, "step": 1557 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9894766729534595e-05, "loss": 0.8633, "step": 1558 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9894477938453302e-05, "loss": 0.8927, "step": 1559 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9894188753752726e-05, "loss": 0.7755, "step": 1560 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9893899175444372e-05, "loss": 0.8176, "step": 1561 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9893609203539757e-05, "loss": 0.8195, "step": 1562 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.989331883805042e-05, "loss": 0.9073, "step": 1563 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9893028078987914e-05, "loss": 0.8611, "step": 1564 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.98927369263638e-05, "loss": 0.9232, "step": 1565 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9892445380189664e-05, "loss": 0.7768, "step": 1566 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9892153440477106e-05, "loss": 0.8265, "step": 1567 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9891861107237737e-05, "loss": 0.7812, "step": 1568 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.989156838048319e-05, "loss": 0.9009, "step": 1569 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9891275260225105e-05, "loss": 0.9681, "step": 1570 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9890981746475145e-05, "loss": 0.7394, "step": 1571 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9890687839244988e-05, "loss": 0.9553, "step": 1572 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9890393538546325e-05, "loss": 1.0221, "step": 1573 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9890098844390864e-05, "loss": 0.9869, "step": 1574 }, { "epoch": 0.28, "grad_norm": 0.0, "learning_rate": 1.9889803756790327e-05, "loss": 0.6439, "step": 1575 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9889508275756457e-05, "loss": 0.7841, "step": 1576 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9889212401301004e-05, "loss": 0.7241, "step": 1577 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9888916133435742e-05, "loss": 0.9234, "step": 1578 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.988861947217245e-05, "loss": 0.9251, "step": 1579 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9888322417522944e-05, "loss": 0.9168, "step": 1580 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9888024969499023e-05, "loss": 0.7724, "step": 1581 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9887727128112537e-05, "loss": 0.8712, "step": 1582 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9887428893375325e-05, "loss": 0.907, "step": 1583 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9887130265299254e-05, "loss": 0.9279, "step": 1584 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9886831243896204e-05, "loss": 0.925, "step": 1585 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9886531829178067e-05, "loss": 0.8613, "step": 1586 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9886232021156762e-05, "loss": 0.8001, "step": 1587 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.988593181984421e-05, "loss": 0.8607, "step": 1588 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9885631225252352e-05, "loss": 0.802, "step": 1589 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.988533023739315e-05, "loss": 0.9172, "step": 1590 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9885028856278573e-05, "loss": 0.8837, "step": 1591 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9884727081920622e-05, "loss": 1.0244, "step": 1592 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9884424914331288e-05, "loss": 0.9322, "step": 1593 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.98841223535226e-05, "loss": 0.7694, "step": 1594 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9883819399506592e-05, "loss": 0.8129, "step": 1595 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9883516052295317e-05, "loss": 0.935, "step": 1596 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.988321231190084e-05, "loss": 0.9016, "step": 1597 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9882908178335253e-05, "loss": 0.9155, "step": 1598 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9882603651610642e-05, "loss": 0.7938, "step": 1599 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9882298731739128e-05, "loss": 0.7826, "step": 1600 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9881993418732843e-05, "loss": 0.8026, "step": 1601 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.988168771260393e-05, "loss": 1.0094, "step": 1602 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9881381613364554e-05, "loss": 0.855, "step": 1603 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9881075121026886e-05, "loss": 1.0033, "step": 1604 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9880768235603126e-05, "loss": 0.8005, "step": 1605 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9880460957105473e-05, "loss": 0.9051, "step": 1606 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9880153285546165e-05, "loss": 0.7769, "step": 1607 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9879845220937427e-05, "loss": 0.8374, "step": 1608 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9879536763291526e-05, "loss": 0.8754, "step": 1609 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9879227912620722e-05, "loss": 0.7977, "step": 1610 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9878918668937314e-05, "loss": 0.8104, "step": 1611 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9878609032253596e-05, "loss": 0.8447, "step": 1612 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9878299002581885e-05, "loss": 0.8882, "step": 1613 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9877988579934516e-05, "loss": 0.972, "step": 1614 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9877677764323847e-05, "loss": 0.8883, "step": 1615 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9877366555762228e-05, "loss": 0.8801, "step": 1616 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.987705495426205e-05, "loss": 0.7562, "step": 1617 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9876742959835703e-05, "loss": 0.8047, "step": 1618 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9876430572495604e-05, "loss": 0.8305, "step": 1619 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9876117792254175e-05, "loss": 0.8766, "step": 1620 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9875804619123864e-05, "loss": 0.9769, "step": 1621 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9875491053117123e-05, "loss": 0.7607, "step": 1622 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9875177094246435e-05, "loss": 0.8411, "step": 1623 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9874862742524282e-05, "loss": 0.7839, "step": 1624 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9874547997963176e-05, "loss": 0.8367, "step": 1625 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9874232860575633e-05, "loss": 0.7984, "step": 1626 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.987391733037419e-05, "loss": 0.7775, "step": 1627 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.98736014073714e-05, "loss": 0.8257, "step": 1628 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9873285091579834e-05, "loss": 0.7581, "step": 1629 }, { "epoch": 0.29, "grad_norm": 0.0, "learning_rate": 1.9872968383012073e-05, "loss": 0.8579, "step": 1630 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.987265128168071e-05, "loss": 0.8387, "step": 1631 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9872333787598376e-05, "loss": 0.8515, "step": 1632 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.987201590077769e-05, "loss": 0.8959, "step": 1633 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9871697621231294e-05, "loss": 0.9414, "step": 1634 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.987137894897186e-05, "loss": 0.8911, "step": 1635 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.987105988401206e-05, "loss": 0.8276, "step": 1636 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9870740426364587e-05, "loss": 0.8152, "step": 1637 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.987042057604215e-05, "loss": 0.8168, "step": 1638 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9870100333057473e-05, "loss": 0.7755, "step": 1639 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9869779697423297e-05, "loss": 0.8552, "step": 1640 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9869458669152378e-05, "loss": 1.0335, "step": 1641 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9869137248257483e-05, "loss": 0.8531, "step": 1642 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9868815434751406e-05, "loss": 0.834, "step": 1643 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.986849322864694e-05, "loss": 0.7815, "step": 1644 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.986817062995691e-05, "loss": 0.7697, "step": 1645 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.986784763869415e-05, "loss": 0.8755, "step": 1646 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.98675242548715e-05, "loss": 0.7922, "step": 1647 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9867200478501836e-05, "loss": 0.6549, "step": 1648 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9866876309598032e-05, "loss": 0.8717, "step": 1649 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9866551748172987e-05, "loss": 0.9453, "step": 1650 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.986622679423961e-05, "loss": 0.6957, "step": 1651 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.986590144781083e-05, "loss": 0.8204, "step": 1652 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9865575708899587e-05, "loss": 0.7456, "step": 1653 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9865249577518842e-05, "loss": 0.9156, "step": 1654 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.986492305368157e-05, "loss": 0.8565, "step": 1655 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9864596137400757e-05, "loss": 0.841, "step": 1656 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9864268828689413e-05, "loss": 0.9507, "step": 1657 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9863941127560557e-05, "loss": 0.7975, "step": 1658 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9863613034027224e-05, "loss": 0.9021, "step": 1659 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9863284548102467e-05, "loss": 0.7694, "step": 1660 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9862955669799356e-05, "loss": 0.8755, "step": 1661 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.986262639913097e-05, "loss": 0.8942, "step": 1662 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9862296736110412e-05, "loss": 0.7605, "step": 1663 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9861966680750796e-05, "loss": 0.9268, "step": 1664 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9861636233065247e-05, "loss": 1.0061, "step": 1665 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9861305393066916e-05, "loss": 0.8402, "step": 1666 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.986097416076896e-05, "loss": 0.8228, "step": 1667 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9860642536184564e-05, "loss": 0.922, "step": 1668 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.986031051932692e-05, "loss": 0.8374, "step": 1669 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9859978110209225e-05, "loss": 0.9336, "step": 1670 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9859645308844708e-05, "loss": 0.8747, "step": 1671 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9859312115246613e-05, "loss": 0.8094, "step": 1672 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9858978529428193e-05, "loss": 0.9607, "step": 1673 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9858644551402718e-05, "loss": 0.8301, "step": 1674 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.985831018118347e-05, "loss": 0.8003, "step": 1675 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.985797541878376e-05, "loss": 1.0015, "step": 1676 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9857640264216895e-05, "loss": 0.813, "step": 1677 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9857304717496217e-05, "loss": 0.9184, "step": 1678 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.985696877863507e-05, "loss": 0.9047, "step": 1679 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.985663244764682e-05, "loss": 0.9008, "step": 1680 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9856295724544842e-05, "loss": 0.9016, "step": 1681 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9855958609342542e-05, "loss": 0.8728, "step": 1682 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.985562110205332e-05, "loss": 0.8887, "step": 1683 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9855283202690606e-05, "loss": 0.9838, "step": 1684 }, { "epoch": 0.3, "grad_norm": 0.0, "learning_rate": 1.9854944911267842e-05, "loss": 0.7705, "step": 1685 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.985460622779849e-05, "loss": 0.9726, "step": 1686 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9854267152296023e-05, "loss": 0.836, "step": 1687 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9853927684773924e-05, "loss": 0.8308, "step": 1688 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9853587825245698e-05, "loss": 0.826, "step": 1689 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9853247573724875e-05, "loss": 0.8896, "step": 1690 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9852906930224977e-05, "loss": 0.8026, "step": 1691 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.985256589475957e-05, "loss": 0.8911, "step": 1692 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9852224467342207e-05, "loss": 0.9459, "step": 1693 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.985188264798648e-05, "loss": 0.9043, "step": 1694 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.985154043670598e-05, "loss": 0.7942, "step": 1695 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9851197833514328e-05, "loss": 0.8168, "step": 1696 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9850854838425152e-05, "loss": 0.8414, "step": 1697 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9850511451452094e-05, "loss": 0.8865, "step": 1698 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9850167672608814e-05, "loss": 0.7323, "step": 1699 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984982350190899e-05, "loss": 0.7614, "step": 1700 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9849478939366315e-05, "loss": 0.8828, "step": 1701 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9849133984994492e-05, "loss": 0.8663, "step": 1702 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9848788638807247e-05, "loss": 0.7979, "step": 1703 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984844290081832e-05, "loss": 0.7988, "step": 1704 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9848096771041464e-05, "loss": 0.7257, "step": 1705 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9847750249490444e-05, "loss": 0.7458, "step": 1706 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984740333617905e-05, "loss": 0.9428, "step": 1707 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9847056031121083e-05, "loss": 0.8536, "step": 1708 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9846708334330358e-05, "loss": 0.936, "step": 1709 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984636024582071e-05, "loss": 0.7702, "step": 1710 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984601176560598e-05, "loss": 0.7604, "step": 1711 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9845662893700032e-05, "loss": 0.8844, "step": 1712 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984531363011675e-05, "loss": 1.0279, "step": 1713 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984496397487003e-05, "loss": 0.858, "step": 1714 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984461392797377e-05, "loss": 0.8616, "step": 1715 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984426348944191e-05, "loss": 0.724, "step": 1716 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9843912659288384e-05, "loss": 0.8685, "step": 1717 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9843561437527145e-05, "loss": 0.9164, "step": 1718 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984320982417217e-05, "loss": 0.9896, "step": 1719 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9842857819237448e-05, "loss": 0.8009, "step": 1720 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.984250542273698e-05, "loss": 0.8896, "step": 1721 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9842152634684782e-05, "loss": 0.9102, "step": 1722 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9841799455094893e-05, "loss": 0.8, "step": 1723 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9841445883981364e-05, "loss": 0.8362, "step": 1724 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9841091921358256e-05, "loss": 0.78, "step": 1725 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9840737567239654e-05, "loss": 0.8574, "step": 1726 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9840382821639653e-05, "loss": 0.7593, "step": 1727 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9840027684572364e-05, "loss": 1.0259, "step": 1728 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9839672156051918e-05, "loss": 0.7795, "step": 1729 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.983931623609246e-05, "loss": 0.7547, "step": 1730 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9838959924708144e-05, "loss": 0.9974, "step": 1731 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9838603221913147e-05, "loss": 0.9671, "step": 1732 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9838246127721657e-05, "loss": 1.0668, "step": 1733 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9837888642147887e-05, "loss": 0.896, "step": 1734 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.983753076520605e-05, "loss": 0.9057, "step": 1735 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.983717249691039e-05, "loss": 0.9786, "step": 1736 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9836813837275157e-05, "loss": 0.8709, "step": 1737 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9836454786314613e-05, "loss": 0.749, "step": 1738 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.983609534404305e-05, "loss": 0.843, "step": 1739 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.983573551047476e-05, "loss": 0.8583, "step": 1740 }, { "epoch": 0.31, "grad_norm": 0.0, "learning_rate": 1.9835375285624065e-05, "loss": 0.918, "step": 1741 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9835014669505295e-05, "loss": 0.8001, "step": 1742 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9834653662132787e-05, "loss": 0.9448, "step": 1743 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9834292263520914e-05, "loss": 0.8132, "step": 1744 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9833930473684045e-05, "loss": 0.8903, "step": 1745 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9833568292636576e-05, "loss": 0.9345, "step": 1746 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9833205720392915e-05, "loss": 0.7816, "step": 1747 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9832842756967486e-05, "loss": 0.7435, "step": 1748 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9832479402374724e-05, "loss": 0.8485, "step": 1749 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.983211565662909e-05, "loss": 0.9733, "step": 1750 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.983175151974505e-05, "loss": 0.9297, "step": 1751 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9831386991737094e-05, "loss": 0.8289, "step": 1752 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.983102207261972e-05, "loss": 0.9164, "step": 1753 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9830656762407447e-05, "loss": 0.76, "step": 1754 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9830291061114807e-05, "loss": 0.7761, "step": 1755 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.982992496875635e-05, "loss": 0.9202, "step": 1756 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9829558485346635e-05, "loss": 0.7217, "step": 1757 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9829191610900248e-05, "loss": 0.8911, "step": 1758 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9828824345431777e-05, "loss": 0.924, "step": 1759 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9828456688955838e-05, "loss": 1.0824, "step": 1760 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9828088641487055e-05, "loss": 0.851, "step": 1761 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9827720203040067e-05, "loss": 0.7511, "step": 1762 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9827351373629535e-05, "loss": 0.8588, "step": 1763 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.982698215327013e-05, "loss": 0.9355, "step": 1764 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9826612541976542e-05, "loss": 0.9327, "step": 1765 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.982624253976347e-05, "loss": 0.7986, "step": 1766 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.982587214664564e-05, "loss": 0.8457, "step": 1767 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.982550136263778e-05, "loss": 0.9696, "step": 1768 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9825130187754648e-05, "loss": 0.7468, "step": 1769 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9824758622011003e-05, "loss": 0.8658, "step": 1770 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.982438666542163e-05, "loss": 0.9022, "step": 1771 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9824014318001326e-05, "loss": 0.8512, "step": 1772 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.98236415797649e-05, "loss": 0.9914, "step": 1773 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9823268450727186e-05, "loss": 0.7518, "step": 1774 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9822894930903022e-05, "loss": 0.9231, "step": 1775 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9822521020307272e-05, "loss": 0.8931, "step": 1776 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.982214671895481e-05, "loss": 0.8074, "step": 1777 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9821772026860523e-05, "loss": 0.8926, "step": 1778 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.982139694403932e-05, "loss": 0.7356, "step": 1779 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.982102147050612e-05, "loss": 0.7901, "step": 1780 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9820645606275864e-05, "loss": 0.8703, "step": 1781 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9820269351363503e-05, "loss": 0.8877, "step": 1782 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9819892705784004e-05, "loss": 0.8791, "step": 1783 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9819515669552348e-05, "loss": 0.7994, "step": 1784 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9819138242683537e-05, "loss": 0.9121, "step": 1785 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9818760425192587e-05, "loss": 0.8763, "step": 1786 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9818382217094528e-05, "loss": 0.8015, "step": 1787 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9818003618404403e-05, "loss": 0.7552, "step": 1788 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9817624629137273e-05, "loss": 0.9527, "step": 1789 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9817245249308217e-05, "loss": 0.8172, "step": 1790 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.981686547893233e-05, "loss": 0.8715, "step": 1791 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.981648531802472e-05, "loss": 0.8386, "step": 1792 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.98161047666005e-05, "loss": 0.8512, "step": 1793 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9815723824674822e-05, "loss": 0.865, "step": 1794 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.981534249226283e-05, "loss": 0.8477, "step": 1795 }, { "epoch": 0.32, "grad_norm": 0.0, "learning_rate": 1.9814960769379704e-05, "loss": 0.893, "step": 1796 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9814578656040622e-05, "loss": 0.7068, "step": 1797 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.981419615226079e-05, "loss": 1.022, "step": 1798 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.981381325805542e-05, "loss": 1.0387, "step": 1799 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9813429973439748e-05, "loss": 0.8024, "step": 1800 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9813046298429017e-05, "loss": 0.9061, "step": 1801 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9812662233038497e-05, "loss": 0.8081, "step": 1802 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9812277777283464e-05, "loss": 0.7511, "step": 1803 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.981189293117921e-05, "loss": 0.9379, "step": 1804 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9811507694741046e-05, "loss": 0.8358, "step": 1805 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.98111220679843e-05, "loss": 0.7331, "step": 1806 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9810736050924307e-05, "loss": 0.9643, "step": 1807 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.981034964357643e-05, "loss": 0.9029, "step": 1808 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9809962845956037e-05, "loss": 0.9771, "step": 1809 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.980957565807852e-05, "loss": 0.8735, "step": 1810 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9809188079959274e-05, "loss": 0.9551, "step": 1811 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9808800111613724e-05, "loss": 0.969, "step": 1812 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9808411753057303e-05, "loss": 0.9144, "step": 1813 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9808023004305455e-05, "loss": 0.7919, "step": 1814 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9807633865373654e-05, "loss": 0.912, "step": 1815 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.980724433627738e-05, "loss": 0.7942, "step": 1816 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.980685441703212e-05, "loss": 0.9664, "step": 1817 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9806464107653394e-05, "loss": 0.8227, "step": 1818 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9806073408156723e-05, "loss": 0.7677, "step": 1819 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9805682318557656e-05, "loss": 0.6752, "step": 1820 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.980529083887175e-05, "loss": 0.7781, "step": 1821 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.980489896911457e-05, "loss": 0.8026, "step": 1822 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9804506709301717e-05, "loss": 0.8784, "step": 1823 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9804114059448795e-05, "loss": 0.7682, "step": 1824 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9803721019571413e-05, "loss": 0.9817, "step": 1825 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9803327589685216e-05, "loss": 0.776, "step": 1826 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9802933769805857e-05, "loss": 0.8084, "step": 1827 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9802539559948998e-05, "loss": 0.7727, "step": 1828 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.980214496013032e-05, "loss": 0.779, "step": 1829 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9801749970365526e-05, "loss": 0.8893, "step": 1830 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9801354590670325e-05, "loss": 0.8209, "step": 1831 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.980095882106045e-05, "loss": 0.8968, "step": 1832 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9800562661551642e-05, "loss": 0.8143, "step": 1833 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.980016611215966e-05, "loss": 0.9316, "step": 1834 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9799769172900283e-05, "loss": 0.7327, "step": 1835 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9799371843789302e-05, "loss": 0.8763, "step": 1836 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.979897412484252e-05, "loss": 0.9539, "step": 1837 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9798576016075762e-05, "loss": 0.8211, "step": 1838 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9798177517504866e-05, "loss": 0.8339, "step": 1839 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.979777862914568e-05, "loss": 0.8664, "step": 1840 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9797379351014076e-05, "loss": 1.0645, "step": 1841 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9796979683125936e-05, "loss": 0.8591, "step": 1842 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9796579625497164e-05, "loss": 0.8709, "step": 1843 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.979617917814367e-05, "loss": 0.8599, "step": 1844 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9795778341081385e-05, "loss": 0.8778, "step": 1845 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.979537711432626e-05, "loss": 0.9137, "step": 1846 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9794975497894255e-05, "loss": 0.7843, "step": 1847 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.979457349180134e-05, "loss": 0.893, "step": 1848 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9794171096063516e-05, "loss": 0.8745, "step": 1849 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9793768310696783e-05, "loss": 0.7585, "step": 1850 }, { "epoch": 0.33, "grad_norm": 0.0, "learning_rate": 1.9793365135717172e-05, "loss": 0.8597, "step": 1851 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9792961571140718e-05, "loss": 0.9358, "step": 1852 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9792557616983478e-05, "loss": 0.8918, "step": 1853 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.979215327326152e-05, "loss": 0.8257, "step": 1854 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.979174853999093e-05, "loss": 0.9007, "step": 1855 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.979134341718781e-05, "loss": 0.7653, "step": 1856 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.979093790486827e-05, "loss": 0.8249, "step": 1857 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9790532003048452e-05, "loss": 0.8137, "step": 1858 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.97901257117445e-05, "loss": 0.8418, "step": 1859 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9789719030972573e-05, "loss": 0.7798, "step": 1860 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9789311960748852e-05, "loss": 0.9971, "step": 1861 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9788904501089534e-05, "loss": 0.8596, "step": 1862 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9788496652010822e-05, "loss": 0.8528, "step": 1863 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9788088413528944e-05, "loss": 0.9426, "step": 1864 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9787679785660145e-05, "loss": 0.8107, "step": 1865 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9787270768420674e-05, "loss": 0.8406, "step": 1866 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9786861361826805e-05, "loss": 0.8956, "step": 1867 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9786451565894828e-05, "loss": 0.8373, "step": 1868 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.978604138064104e-05, "loss": 0.6561, "step": 1869 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9785630806081765e-05, "loss": 0.8107, "step": 1870 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.978521984223333e-05, "loss": 1.1331, "step": 1871 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9784808489112087e-05, "loss": 0.8472, "step": 1872 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.97843967467344e-05, "loss": 0.8509, "step": 1873 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.978398461511665e-05, "loss": 0.8444, "step": 1874 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.978357209427523e-05, "loss": 0.8547, "step": 1875 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.978315918422655e-05, "loss": 0.8672, "step": 1876 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.978274588498704e-05, "loss": 0.8704, "step": 1877 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.978233219657314e-05, "loss": 0.8831, "step": 1878 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9781918119001306e-05, "loss": 0.8858, "step": 1879 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9781503652288013e-05, "loss": 0.843, "step": 1880 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.978108879644975e-05, "loss": 0.8098, "step": 1881 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9780673551503014e-05, "loss": 0.8322, "step": 1882 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9780257917464333e-05, "loss": 0.7709, "step": 1883 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977984189435024e-05, "loss": 0.9333, "step": 1884 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9779425482177277e-05, "loss": 0.8065, "step": 1885 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9779008680962018e-05, "loss": 0.763, "step": 1886 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9778591490721036e-05, "loss": 0.9571, "step": 1887 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977817391147094e-05, "loss": 0.81, "step": 1888 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977775594322833e-05, "loss": 0.9296, "step": 1889 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977733758600984e-05, "loss": 0.8808, "step": 1890 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9776918839832113e-05, "loss": 0.8165, "step": 1891 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9776499704711805e-05, "loss": 0.8057, "step": 1892 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9776080180665593e-05, "loss": 0.7786, "step": 1893 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977566026771016e-05, "loss": 0.9131, "step": 1894 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9775239965862217e-05, "loss": 1.0563, "step": 1895 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977481927513848e-05, "loss": 0.8082, "step": 1896 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977439819555569e-05, "loss": 0.8967, "step": 1897 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9773976727130594e-05, "loss": 0.9027, "step": 1898 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9773554869879957e-05, "loss": 0.8937, "step": 1899 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9773132623820568e-05, "loss": 0.9367, "step": 1900 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977270998896922e-05, "loss": 0.8528, "step": 1901 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9772286965342727e-05, "loss": 0.7758, "step": 1902 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9771863552957916e-05, "loss": 0.812, "step": 1903 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.9771439751831636e-05, "loss": 0.8795, "step": 1904 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977101556198074e-05, "loss": 0.8153, "step": 1905 }, { "epoch": 0.34, "grad_norm": 0.0, "learning_rate": 1.977059098342211e-05, "loss": 0.9014, "step": 1906 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.977016601617263e-05, "loss": 0.8654, "step": 1907 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.976974066024921e-05, "loss": 0.841, "step": 1908 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.976931491566877e-05, "loss": 0.906, "step": 1909 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9768888782448247e-05, "loss": 0.768, "step": 1910 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9768462260604592e-05, "loss": 0.8389, "step": 1911 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9768035350154774e-05, "loss": 0.6694, "step": 1912 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9767608051115777e-05, "loss": 0.9136, "step": 1913 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9767180363504597e-05, "loss": 0.8856, "step": 1914 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9766752287338252e-05, "loss": 0.7825, "step": 1915 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.976632382263377e-05, "loss": 0.8317, "step": 1916 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9765894969408195e-05, "loss": 0.9007, "step": 1917 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.976546572767859e-05, "loss": 0.835, "step": 1918 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9765036097462026e-05, "loss": 0.9547, "step": 1919 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.97646060787756e-05, "loss": 0.8467, "step": 1920 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9764175671636413e-05, "loss": 0.8636, "step": 1921 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9763744876061595e-05, "loss": 0.74, "step": 1922 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9763313692068276e-05, "loss": 0.7773, "step": 1923 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9762882119673617e-05, "loss": 0.7517, "step": 1924 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9762450158894778e-05, "loss": 0.7552, "step": 1925 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9762017809748954e-05, "loss": 0.7331, "step": 1926 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9761585072253333e-05, "loss": 0.9443, "step": 1927 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9761151946425134e-05, "loss": 0.8253, "step": 1928 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9760718432281592e-05, "loss": 0.881, "step": 1929 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.976028452983995e-05, "loss": 0.8267, "step": 1930 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9759850239117466e-05, "loss": 0.8101, "step": 1931 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9759415560131426e-05, "loss": 0.7938, "step": 1932 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9758980492899108e-05, "loss": 0.8528, "step": 1933 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9758545037437833e-05, "loss": 0.8291, "step": 1934 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.975810919376492e-05, "loss": 0.9002, "step": 1935 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9757672961897705e-05, "loss": 0.9379, "step": 1936 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9757236341853542e-05, "loss": 0.9484, "step": 1937 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9756799333649806e-05, "loss": 0.8743, "step": 1938 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9756361937303877e-05, "loss": 0.9623, "step": 1939 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9755924152833154e-05, "loss": 0.7453, "step": 1940 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9755485980255055e-05, "loss": 0.9337, "step": 1941 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.975504741958702e-05, "loss": 0.8464, "step": 1942 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9754608470846477e-05, "loss": 0.7391, "step": 1943 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9754169134050902e-05, "loss": 0.8241, "step": 1944 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.975372940921777e-05, "loss": 0.8637, "step": 1945 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9753289296364573e-05, "loss": 0.8447, "step": 1946 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.975284879550882e-05, "loss": 0.8543, "step": 1947 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.975240790666803e-05, "loss": 0.8015, "step": 1948 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.975196662985975e-05, "loss": 0.7125, "step": 1949 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9751524965101535e-05, "loss": 0.8187, "step": 1950 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9751082912410947e-05, "loss": 0.8799, "step": 1951 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9750640471805578e-05, "loss": 0.9003, "step": 1952 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9750197643303025e-05, "loss": 0.868, "step": 1953 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.974975442692091e-05, "loss": 0.7461, "step": 1954 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.974931082267686e-05, "loss": 0.7864, "step": 1955 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9748866830588525e-05, "loss": 0.835, "step": 1956 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9748422450673565e-05, "loss": 0.8927, "step": 1957 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.974797768294966e-05, "loss": 0.9251, "step": 1958 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9747532527434505e-05, "loss": 0.8852, "step": 1959 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9747086984145807e-05, "loss": 0.9949, "step": 1960 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.974664105310129e-05, "loss": 0.8044, "step": 1961 }, { "epoch": 0.35, "grad_norm": 0.0, "learning_rate": 1.9746194734318694e-05, "loss": 0.6902, "step": 1962 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.974574802781578e-05, "loss": 0.9237, "step": 1963 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9745300933610308e-05, "loss": 0.7409, "step": 1964 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9744853451720075e-05, "loss": 0.753, "step": 1965 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9744405582162875e-05, "loss": 0.7775, "step": 1966 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.974395732495653e-05, "loss": 0.7047, "step": 1967 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.974350868011887e-05, "loss": 0.839, "step": 1968 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9743059647667743e-05, "loss": 0.9149, "step": 1969 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.974261022762101e-05, "loss": 0.7685, "step": 1970 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9742160419996556e-05, "loss": 0.8032, "step": 1971 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.974171022481227e-05, "loss": 0.9056, "step": 1972 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9741259642086063e-05, "loss": 0.7517, "step": 1973 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.974080867183586e-05, "loss": 0.7457, "step": 1974 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.97403573140796e-05, "loss": 0.9047, "step": 1975 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9739905568835236e-05, "loss": 0.9788, "step": 1976 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973945343612075e-05, "loss": 0.8608, "step": 1977 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973900091595412e-05, "loss": 0.7679, "step": 1978 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9738548008353343e-05, "loss": 1.0158, "step": 1979 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973809471333645e-05, "loss": 0.7965, "step": 1980 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9737641030921467e-05, "loss": 0.8069, "step": 1981 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973718696112644e-05, "loss": 0.7318, "step": 1982 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9736732503969436e-05, "loss": 0.7957, "step": 1983 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9736277659468533e-05, "loss": 0.7764, "step": 1984 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9735822427641828e-05, "loss": 0.8698, "step": 1985 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9735366808507423e-05, "loss": 0.8863, "step": 1986 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973491080208345e-05, "loss": 0.8668, "step": 1987 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973445440838805e-05, "loss": 0.8491, "step": 1988 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973399762743938e-05, "loss": 0.8414, "step": 1989 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9733540459255605e-05, "loss": 0.9443, "step": 1990 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9733082903854923e-05, "loss": 0.8377, "step": 1991 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973262496125552e-05, "loss": 0.8797, "step": 1992 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973216663147563e-05, "loss": 0.8512, "step": 1993 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9731707914533476e-05, "loss": 0.8314, "step": 1994 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.973124881044731e-05, "loss": 0.8309, "step": 1995 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9730789319235395e-05, "loss": 0.8327, "step": 1996 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9730329440916013e-05, "loss": 0.875, "step": 1997 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9729869175507457e-05, "loss": 0.9013, "step": 1998 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9729408523028035e-05, "loss": 0.9298, "step": 1999 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9728947483496078e-05, "loss": 0.689, "step": 2000 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.972848605692992e-05, "loss": 0.9141, "step": 2001 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.972802424334792e-05, "loss": 0.9048, "step": 2002 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9727562042768452e-05, "loss": 0.7338, "step": 2003 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9727099455209906e-05, "loss": 0.9775, "step": 2004 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9726636480690676e-05, "loss": 0.751, "step": 2005 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9726173119229185e-05, "loss": 0.8969, "step": 2006 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9725709370843863e-05, "loss": 0.741, "step": 2007 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9725245235553166e-05, "loss": 0.8038, "step": 2008 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.972478071337555e-05, "loss": 0.9417, "step": 2009 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9724315804329498e-05, "loss": 0.8488, "step": 2010 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9723850508433508e-05, "loss": 0.8834, "step": 2011 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9723384825706082e-05, "loss": 0.8463, "step": 2012 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9722918756165753e-05, "loss": 0.8054, "step": 2013 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9722452299831057e-05, "loss": 0.7956, "step": 2014 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9721985456720556e-05, "loss": 0.8503, "step": 2015 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.9721518226852815e-05, "loss": 0.9191, "step": 2016 }, { "epoch": 0.36, "grad_norm": 0.0, "learning_rate": 1.972105061024643e-05, "loss": 0.8587, "step": 2017 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9720582606919998e-05, "loss": 0.8398, "step": 2018 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9720114216892137e-05, "loss": 0.8773, "step": 2019 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9719645440181477e-05, "loss": 0.8909, "step": 2020 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9719176276806674e-05, "loss": 0.9544, "step": 2021 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9718706726786393e-05, "loss": 0.7994, "step": 2022 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.97182367901393e-05, "loss": 0.8781, "step": 2023 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9717766466884106e-05, "loss": 0.8124, "step": 2024 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9717295757039513e-05, "loss": 0.9776, "step": 2025 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9716824660624248e-05, "loss": 0.8565, "step": 2026 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9716353177657054e-05, "loss": 0.7551, "step": 2027 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9715881308156685e-05, "loss": 1.0665, "step": 2028 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.971540905214191e-05, "loss": 0.8109, "step": 2029 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.971493640963152e-05, "loss": 0.9169, "step": 2030 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.971446338064432e-05, "loss": 0.8861, "step": 2031 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9713989965199123e-05, "loss": 0.9244, "step": 2032 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9713516163314766e-05, "loss": 0.783, "step": 2033 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9713041975010093e-05, "loss": 0.9797, "step": 2034 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.971256740030397e-05, "loss": 0.9352, "step": 2035 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.971209243921528e-05, "loss": 0.9066, "step": 2036 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9711617091762917e-05, "loss": 0.7446, "step": 2037 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9711141357965787e-05, "loss": 0.9906, "step": 2038 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9710665237842818e-05, "loss": 0.8385, "step": 2039 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.971018873141295e-05, "loss": 0.889, "step": 2040 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9709711838695137e-05, "loss": 0.8003, "step": 2041 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9709234559708357e-05, "loss": 0.8628, "step": 2042 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9708756894471593e-05, "loss": 0.7576, "step": 2043 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9708278843003848e-05, "loss": 0.8633, "step": 2044 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.970780040532414e-05, "loss": 0.8184, "step": 2045 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.97073215814515e-05, "loss": 0.746, "step": 2046 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9706842371404978e-05, "loss": 0.9369, "step": 2047 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9706362775203636e-05, "loss": 0.8667, "step": 2048 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9705882792866557e-05, "loss": 0.7557, "step": 2049 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9705402424412833e-05, "loss": 0.801, "step": 2050 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9704921669861575e-05, "loss": 0.9177, "step": 2051 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9704440529231905e-05, "loss": 0.7453, "step": 2052 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.970395900254297e-05, "loss": 0.9353, "step": 2053 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.970347708981392e-05, "loss": 0.9555, "step": 2054 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.970299479106393e-05, "loss": 0.6973, "step": 2055 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9702512106312182e-05, "loss": 0.8612, "step": 2056 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9702029035577882e-05, "loss": 0.7642, "step": 2057 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9701545578880247e-05, "loss": 0.8622, "step": 2058 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.970106173623851e-05, "loss": 0.9624, "step": 2059 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9700577507671917e-05, "loss": 1.0923, "step": 2060 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9700092893199732e-05, "loss": 0.8819, "step": 2061 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.969960789284124e-05, "loss": 0.817, "step": 2062 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9699122506615724e-05, "loss": 0.9279, "step": 2063 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.96986367345425e-05, "loss": 0.9074, "step": 2064 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9698150576640895e-05, "loss": 0.9792, "step": 2065 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9697664032930243e-05, "loss": 0.9284, "step": 2066 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9697177103429904e-05, "loss": 0.8634, "step": 2067 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9696689788159245e-05, "loss": 0.882, "step": 2068 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.969620208713766e-05, "loss": 0.7545, "step": 2069 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.9695714000384543e-05, "loss": 0.8203, "step": 2070 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.969522552791931e-05, "loss": 0.7675, "step": 2071 }, { "epoch": 0.37, "grad_norm": 0.0, "learning_rate": 1.96947366697614e-05, "loss": 0.8649, "step": 2072 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9694247425930257e-05, "loss": 0.8395, "step": 2073 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9693757796445345e-05, "loss": 0.8686, "step": 2074 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.969326778132614e-05, "loss": 0.9034, "step": 2075 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.969277738059214e-05, "loss": 0.8382, "step": 2076 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.969228659426285e-05, "loss": 0.9053, "step": 2077 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.969179542235779e-05, "loss": 0.7042, "step": 2078 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9691303864896514e-05, "loss": 0.8646, "step": 2079 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9690811921898562e-05, "loss": 0.8694, "step": 2080 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9690319593383512e-05, "loss": 0.8332, "step": 2081 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.968982687937095e-05, "loss": 0.8757, "step": 2082 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.968933377988047e-05, "loss": 0.8424, "step": 2083 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9688840294931698e-05, "loss": 0.8813, "step": 2084 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9688346424544262e-05, "loss": 0.8312, "step": 2085 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9687852168737803e-05, "loss": 1.0049, "step": 2086 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9687357527531997e-05, "loss": 0.8827, "step": 2087 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9686862500946505e-05, "loss": 0.9651, "step": 2088 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9686367089001033e-05, "loss": 0.8432, "step": 2089 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9685871291715282e-05, "loss": 0.9335, "step": 2090 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9685375109108978e-05, "loss": 0.8656, "step": 2091 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9684878541201864e-05, "loss": 0.8697, "step": 2092 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9684381588013686e-05, "loss": 0.9836, "step": 2093 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.968388424956422e-05, "loss": 0.8652, "step": 2094 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.968338652587325e-05, "loss": 0.8472, "step": 2095 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9682888416960572e-05, "loss": 0.8046, "step": 2096 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9682389922846007e-05, "loss": 0.8376, "step": 2097 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9681891043549387e-05, "loss": 0.8202, "step": 2098 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9681391779090554e-05, "loss": 0.8616, "step": 2099 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9680892129489367e-05, "loss": 0.9164, "step": 2100 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.968039209476571e-05, "loss": 0.9175, "step": 2101 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.967989167493947e-05, "loss": 0.7682, "step": 2102 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.967939087003056e-05, "loss": 0.983, "step": 2103 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9678889680058896e-05, "loss": 0.9879, "step": 2104 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9678388105044423e-05, "loss": 0.8538, "step": 2105 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.967788614500709e-05, "loss": 0.6428, "step": 2106 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9677383799966866e-05, "loss": 0.7786, "step": 2107 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9676881069943738e-05, "loss": 0.7527, "step": 2108 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.96763779549577e-05, "loss": 0.7982, "step": 2109 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9675874455028774e-05, "loss": 0.7069, "step": 2110 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9675370570176985e-05, "loss": 0.9759, "step": 2111 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.967486630042238e-05, "loss": 0.8814, "step": 2112 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.967436164578502e-05, "loss": 0.8177, "step": 2113 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.967385660628498e-05, "loss": 0.8917, "step": 2114 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.967335118194235e-05, "loss": 0.7824, "step": 2115 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9672845372777244e-05, "loss": 0.9105, "step": 2116 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9672339178809775e-05, "loss": 0.8753, "step": 2117 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9671832600060083e-05, "loss": 0.8108, "step": 2118 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9671325636548324e-05, "loss": 0.949, "step": 2119 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.967081828829466e-05, "loss": 0.7853, "step": 2120 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.967031055531928e-05, "loss": 0.6912, "step": 2121 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.966980243764238e-05, "loss": 0.8531, "step": 2122 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9669293935284174e-05, "loss": 0.9196, "step": 2123 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.966878504826489e-05, "loss": 0.8627, "step": 2124 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9668275776604774e-05, "loss": 0.8498, "step": 2125 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9667766120324085e-05, "loss": 0.8774, "step": 2126 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 1.9667256079443095e-05, "loss": 0.7213, "step": 2127 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.96667456539821e-05, "loss": 0.9425, "step": 2128 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.96662348439614e-05, "loss": 0.8239, "step": 2129 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9665723649401323e-05, "loss": 0.7907, "step": 2130 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.96652120703222e-05, "loss": 0.9044, "step": 2131 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.966470010674438e-05, "loss": 0.8537, "step": 2132 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9664187758688236e-05, "loss": 0.768, "step": 2133 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9663675026174146e-05, "loss": 0.7802, "step": 2134 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.966316190922251e-05, "loss": 0.8858, "step": 2135 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.966264840785374e-05, "loss": 0.9444, "step": 2136 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9662134522088258e-05, "loss": 0.7493, "step": 2137 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9661620251946518e-05, "loss": 0.9081, "step": 2138 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9661105597448974e-05, "loss": 0.8455, "step": 2139 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9660590558616096e-05, "loss": 1.0536, "step": 2140 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9660075135468375e-05, "loss": 0.849, "step": 2141 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9659559328026318e-05, "loss": 0.6391, "step": 2142 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9659043136310444e-05, "loss": 0.7886, "step": 2143 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9658526560341286e-05, "loss": 0.7974, "step": 2144 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9658009600139396e-05, "loss": 0.8633, "step": 2145 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.965749225572534e-05, "loss": 0.8547, "step": 2146 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9656974527119693e-05, "loss": 0.886, "step": 2147 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9656456414343063e-05, "loss": 0.8736, "step": 2148 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.965593791741605e-05, "loss": 0.8214, "step": 2149 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.965541903635929e-05, "loss": 0.8155, "step": 2150 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9654899771193413e-05, "loss": 0.8892, "step": 2151 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9654380121939088e-05, "loss": 0.8194, "step": 2152 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9653860088616982e-05, "loss": 0.8171, "step": 2153 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9653339671247783e-05, "loss": 0.8662, "step": 2154 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9652818869852196e-05, "loss": 0.7588, "step": 2155 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9652297684450936e-05, "loss": 0.9193, "step": 2156 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.965177611506474e-05, "loss": 0.8459, "step": 2157 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9651254161714358e-05, "loss": 0.9152, "step": 2158 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.965073182442055e-05, "loss": 0.9079, "step": 2159 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9650209103204095e-05, "loss": 0.8651, "step": 2160 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.964968599808579e-05, "loss": 0.9213, "step": 2161 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9649162509086447e-05, "loss": 0.7343, "step": 2162 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9648638636226895e-05, "loss": 0.9177, "step": 2163 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.964811437952796e-05, "loss": 1.0352, "step": 2164 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.964758973901051e-05, "loss": 0.8624, "step": 2165 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9647064714695415e-05, "loss": 0.9432, "step": 2166 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9646539306603557e-05, "loss": 0.9183, "step": 2167 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9646013514755844e-05, "loss": 0.848, "step": 2168 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9645487339173183e-05, "loss": 0.8315, "step": 2169 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9644960779876516e-05, "loss": 0.7381, "step": 2170 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9644433836886785e-05, "loss": 0.9121, "step": 2171 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9643906510224956e-05, "loss": 0.7623, "step": 2172 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9643378799912002e-05, "loss": 0.7041, "step": 2173 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.964285070596892e-05, "loss": 0.9888, "step": 2174 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.964232222841672e-05, "loss": 0.7437, "step": 2175 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9641793367276423e-05, "loss": 0.8106, "step": 2176 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.964126412256907e-05, "loss": 0.8601, "step": 2177 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.964073449431571e-05, "loss": 0.81, "step": 2178 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9640204482537417e-05, "loss": 0.9414, "step": 2179 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9639674087255276e-05, "loss": 0.7676, "step": 2180 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9639143308490386e-05, "loss": 0.8549, "step": 2181 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9638612146263862e-05, "loss": 0.8619, "step": 2182 }, { "epoch": 0.39, "grad_norm": 0.0, "learning_rate": 1.9638080600596836e-05, "loss": 0.7188, "step": 2183 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9637548671510452e-05, "loss": 0.7555, "step": 2184 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.963701635902587e-05, "loss": 0.8002, "step": 2185 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.963648366316427e-05, "loss": 0.8705, "step": 2186 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9635950583946844e-05, "loss": 0.8624, "step": 2187 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9635417121394792e-05, "loss": 0.7521, "step": 2188 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9634883275529346e-05, "loss": 0.8195, "step": 2189 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9634349046371734e-05, "loss": 0.8554, "step": 2190 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9633814433943215e-05, "loss": 0.7658, "step": 2191 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9633279438265052e-05, "loss": 0.8331, "step": 2192 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9632744059358527e-05, "loss": 1.0413, "step": 2193 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.963220829724495e-05, "loss": 0.8613, "step": 2194 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.963167215194562e-05, "loss": 0.9747, "step": 2195 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9631135623481872e-05, "loss": 0.8656, "step": 2196 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.963059871187505e-05, "loss": 0.8058, "step": 2197 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9630061417146512e-05, "loss": 0.7079, "step": 2198 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9629523739317636e-05, "loss": 0.8834, "step": 2199 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.962898567840981e-05, "loss": 0.9026, "step": 2200 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9628447234444437e-05, "loss": 0.7503, "step": 2201 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.962790840744294e-05, "loss": 0.845, "step": 2202 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.962736919742675e-05, "loss": 0.7414, "step": 2203 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.962682960441732e-05, "loss": 0.9917, "step": 2204 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.962628962843612e-05, "loss": 0.8373, "step": 2205 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9625749269504628e-05, "loss": 0.7816, "step": 2206 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.962520852764434e-05, "loss": 0.9498, "step": 2207 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9624667402876766e-05, "loss": 0.9972, "step": 2208 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9624125895223436e-05, "loss": 0.8902, "step": 2209 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.962358400470589e-05, "loss": 0.8394, "step": 2210 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9623041731345686e-05, "loss": 0.8344, "step": 2211 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9622499075164397e-05, "loss": 0.6926, "step": 2212 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.962195603618361e-05, "loss": 0.8763, "step": 2213 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9621412614424935e-05, "loss": 0.8902, "step": 2214 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9620868809909976e-05, "loss": 0.8986, "step": 2215 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.962032462266038e-05, "loss": 0.9759, "step": 2216 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9619780052697782e-05, "loss": 0.9059, "step": 2217 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.961923510004386e-05, "loss": 0.6794, "step": 2218 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9618689764720287e-05, "loss": 1.0087, "step": 2219 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9618144046748756e-05, "loss": 0.7939, "step": 2220 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9617597946150976e-05, "loss": 0.7864, "step": 2221 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9617051462948678e-05, "loss": 0.9144, "step": 2222 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9616504597163593e-05, "loss": 0.8857, "step": 2223 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9615957348817484e-05, "loss": 0.7506, "step": 2224 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9615409717932122e-05, "loss": 0.8496, "step": 2225 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9614861704529284e-05, "loss": 0.7423, "step": 2226 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9614313308630778e-05, "loss": 0.8181, "step": 2227 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.961376453025842e-05, "loss": 0.8891, "step": 2228 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9613215369434038e-05, "loss": 0.8743, "step": 2229 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.961266582617948e-05, "loss": 0.9384, "step": 2230 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.961211590051661e-05, "loss": 0.7524, "step": 2231 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.96115655924673e-05, "loss": 0.7478, "step": 2232 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9611014902053447e-05, "loss": 0.9764, "step": 2233 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9610463829296955e-05, "loss": 0.8263, "step": 2234 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9609912374219752e-05, "loss": 0.9645, "step": 2235 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.960936053684377e-05, "loss": 0.8335, "step": 2236 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.960880831719096e-05, "loss": 0.9711, "step": 2237 }, { "epoch": 0.4, "grad_norm": 0.0, "learning_rate": 1.9608255715283298e-05, "loss": 0.8843, "step": 2238 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9607702731142764e-05, "loss": 1.0134, "step": 2239 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9607149364791356e-05, "loss": 0.963, "step": 2240 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.960659561625109e-05, "loss": 0.9782, "step": 2241 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9606041485543992e-05, "loss": 0.8927, "step": 2242 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9605486972692107e-05, "loss": 0.8386, "step": 2243 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.96049320777175e-05, "loss": 0.7486, "step": 2244 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9604376800642233e-05, "loss": 1.0603, "step": 2245 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9603821141488408e-05, "loss": 0.788, "step": 2246 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9603265100278123e-05, "loss": 0.8826, "step": 2247 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9602708677033506e-05, "loss": 0.7943, "step": 2248 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9602151871776683e-05, "loss": 0.9453, "step": 2249 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.960159468452981e-05, "loss": 0.8878, "step": 2250 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.960103711531505e-05, "loss": 1.0535, "step": 2251 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.960047916415459e-05, "loss": 0.7485, "step": 2252 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.959992083107062e-05, "loss": 0.7899, "step": 2253 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9599362116085357e-05, "loss": 0.753, "step": 2254 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9598803019221022e-05, "loss": 0.8502, "step": 2255 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9598243540499858e-05, "loss": 0.8105, "step": 2256 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9597683679944125e-05, "loss": 0.9627, "step": 2257 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9597123437576094e-05, "loss": 0.798, "step": 2258 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.959656281341805e-05, "loss": 0.827, "step": 2259 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.95960018074923e-05, "loss": 0.9703, "step": 2260 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9595440419821155e-05, "loss": 0.9001, "step": 2261 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9594878650426955e-05, "loss": 0.7829, "step": 2262 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.959431649933205e-05, "loss": 0.8516, "step": 2263 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.959375396655879e-05, "loss": 0.9298, "step": 2264 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9593191052129565e-05, "loss": 0.8133, "step": 2265 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9592627756066765e-05, "loss": 0.8465, "step": 2266 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9592064078392804e-05, "loss": 0.7967, "step": 2267 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.95915000191301e-05, "loss": 0.8318, "step": 2268 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9590935578301094e-05, "loss": 0.79, "step": 2269 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.959037075592824e-05, "loss": 0.9718, "step": 2270 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9589805552034008e-05, "loss": 0.9483, "step": 2271 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9589239966640883e-05, "loss": 0.9309, "step": 2272 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9588673999771365e-05, "loss": 0.9719, "step": 2273 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9588107651447967e-05, "loss": 0.8035, "step": 2274 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9587540921693224e-05, "loss": 0.8166, "step": 2275 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.958697381052968e-05, "loss": 0.7706, "step": 2276 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.958640631797989e-05, "loss": 0.8347, "step": 2277 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9585838444066437e-05, "loss": 0.7687, "step": 2278 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9585270188811913e-05, "loss": 0.8432, "step": 2279 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9584701552238914e-05, "loss": 1.0615, "step": 2280 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.958413253437007e-05, "loss": 0.9125, "step": 2281 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9583563135228013e-05, "loss": 0.8544, "step": 2282 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.95829933548354e-05, "loss": 0.8087, "step": 2283 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9582423193214897e-05, "loss": 0.7701, "step": 2284 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9581852650389178e-05, "loss": 0.8854, "step": 2285 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.958128172638095e-05, "loss": 0.8124, "step": 2286 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9580710421212918e-05, "loss": 0.9228, "step": 2287 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9580138734907818e-05, "loss": 0.7416, "step": 2288 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9579566667488385e-05, "loss": 0.7831, "step": 2289 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9578994218977377e-05, "loss": 0.9497, "step": 2290 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.957842138939757e-05, "loss": 0.8816, "step": 2291 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9577848178771754e-05, "loss": 0.9324, "step": 2292 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 1.9577274587122726e-05, "loss": 0.7462, "step": 2293 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.957670061447331e-05, "loss": 1.0069, "step": 2294 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.957612626084634e-05, "loss": 0.938, "step": 2295 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.957555152626466e-05, "loss": 0.9454, "step": 2296 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9574976410751137e-05, "loss": 1.1262, "step": 2297 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9574400914328648e-05, "loss": 0.7147, "step": 2298 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9573825037020087e-05, "loss": 0.8097, "step": 2299 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.957324877884837e-05, "loss": 0.9443, "step": 2300 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9572672139836415e-05, "loss": 0.7972, "step": 2301 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9572095120007165e-05, "loss": 0.7656, "step": 2302 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.957151771938357e-05, "loss": 0.9097, "step": 2303 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9570939937988605e-05, "loss": 0.8305, "step": 2304 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9570361775845254e-05, "loss": 0.7924, "step": 2305 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9569783232976514e-05, "loss": 0.8796, "step": 2306 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9569204309405404e-05, "loss": 0.6832, "step": 2307 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9568625005154954e-05, "loss": 0.7361, "step": 2308 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9568045320248212e-05, "loss": 0.8578, "step": 2309 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9567465254708235e-05, "loss": 0.8448, "step": 2310 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.95668848085581e-05, "loss": 0.8134, "step": 2311 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9566303981820894e-05, "loss": 0.8701, "step": 2312 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9565722774519732e-05, "loss": 0.8393, "step": 2313 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.956514118667773e-05, "loss": 0.803, "step": 2314 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9564559218318026e-05, "loss": 0.9357, "step": 2315 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9563976869463768e-05, "loss": 0.7837, "step": 2316 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9563394140138128e-05, "loss": 0.8906, "step": 2317 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.956281103036429e-05, "loss": 0.9181, "step": 2318 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9562227540165444e-05, "loss": 0.8399, "step": 2319 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9561643669564803e-05, "loss": 0.8283, "step": 2320 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9561059418585603e-05, "loss": 0.8429, "step": 2321 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9560474787251075e-05, "loss": 0.7448, "step": 2322 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9559889775584484e-05, "loss": 0.9133, "step": 2323 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.95593043836091e-05, "loss": 0.8359, "step": 2324 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9558718611348214e-05, "loss": 0.8549, "step": 2325 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9558132458825123e-05, "loss": 0.8499, "step": 2326 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.955754592606315e-05, "loss": 0.9033, "step": 2327 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.955695901308563e-05, "loss": 0.8123, "step": 2328 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9556371719915906e-05, "loss": 0.9028, "step": 2329 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9555784046577345e-05, "loss": 0.8481, "step": 2330 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.955519599309332e-05, "loss": 0.9821, "step": 2331 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9554607559487237e-05, "loss": 0.9398, "step": 2332 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9554018745782493e-05, "loss": 0.9463, "step": 2333 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.955342955200252e-05, "loss": 0.8789, "step": 2334 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9552839978170748e-05, "loss": 0.8493, "step": 2335 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.955225002431064e-05, "loss": 0.7609, "step": 2336 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9551659690445662e-05, "loss": 0.9319, "step": 2337 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9551068976599294e-05, "loss": 0.797, "step": 2338 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9550477882795043e-05, "loss": 0.8371, "step": 2339 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9549886409056428e-05, "loss": 0.8291, "step": 2340 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9549294555406964e-05, "loss": 0.8871, "step": 2341 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9548702321870206e-05, "loss": 0.707, "step": 2342 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9548109708469712e-05, "loss": 0.7399, "step": 2343 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9547516715229053e-05, "loss": 0.8777, "step": 2344 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9546923342171828e-05, "loss": 0.8712, "step": 2345 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9546329589321637e-05, "loss": 0.9247, "step": 2346 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.95457354567021e-05, "loss": 0.9124, "step": 2347 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.9545140944336856e-05, "loss": 0.7325, "step": 2348 }, { "epoch": 0.42, "grad_norm": 0.0, "learning_rate": 1.954454605224955e-05, "loss": 0.7786, "step": 2349 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9543950780463853e-05, "loss": 0.7971, "step": 2350 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9543355129003447e-05, "loss": 0.9211, "step": 2351 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.954275909789202e-05, "loss": 0.7542, "step": 2352 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9542162687153293e-05, "loss": 0.6976, "step": 2353 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9541565896810987e-05, "loss": 0.8287, "step": 2354 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.954096872688884e-05, "loss": 0.9456, "step": 2355 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9540371177410615e-05, "loss": 0.7703, "step": 2356 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.953977324840008e-05, "loss": 0.8862, "step": 2357 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9539174939881026e-05, "loss": 0.6551, "step": 2358 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.953857625187725e-05, "loss": 0.888, "step": 2359 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.953797718441257e-05, "loss": 0.8184, "step": 2360 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9537377737510815e-05, "loss": 0.8739, "step": 2361 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.953677791119584e-05, "loss": 0.7925, "step": 2362 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.95361777054915e-05, "loss": 0.9048, "step": 2363 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9535577120421675e-05, "loss": 0.77, "step": 2364 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9534976156010255e-05, "loss": 0.9062, "step": 2365 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.953437481228115e-05, "loss": 0.8045, "step": 2366 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9533773089258284e-05, "loss": 0.9701, "step": 2367 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.953317098696559e-05, "loss": 0.8645, "step": 2368 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9532568505427022e-05, "loss": 0.912, "step": 2369 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.953196564466655e-05, "loss": 0.7007, "step": 2370 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9531362404708155e-05, "loss": 0.9802, "step": 2371 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.953075878557584e-05, "loss": 0.9852, "step": 2372 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.953015478729361e-05, "loss": 0.924, "step": 2373 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9529550409885494e-05, "loss": 0.9644, "step": 2374 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.952894565337554e-05, "loss": 0.8521, "step": 2375 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9528340517787802e-05, "loss": 0.9879, "step": 2376 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9527735003146357e-05, "loss": 0.8097, "step": 2377 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9527129109475292e-05, "loss": 0.8959, "step": 2378 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.952652283679871e-05, "loss": 0.7755, "step": 2379 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.952591618514073e-05, "loss": 0.9185, "step": 2380 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9525309154525483e-05, "loss": 0.8623, "step": 2381 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9524701744977124e-05, "loss": 0.8642, "step": 2382 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.952409395651981e-05, "loss": 0.8027, "step": 2383 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9523485789177723e-05, "loss": 0.9488, "step": 2384 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.952287724297506e-05, "loss": 0.9691, "step": 2385 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9522268317936023e-05, "loss": 0.8053, "step": 2386 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9521659014084838e-05, "loss": 0.8095, "step": 2387 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.952104933144575e-05, "loss": 0.7449, "step": 2388 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9520439270043005e-05, "loss": 0.9394, "step": 2389 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9519828829900878e-05, "loss": 0.7661, "step": 2390 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9519218011043653e-05, "loss": 0.9767, "step": 2391 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.951860681349563e-05, "loss": 0.8284, "step": 2392 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.951799523728112e-05, "loss": 0.911, "step": 2393 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.951738328242445e-05, "loss": 0.8922, "step": 2394 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9516770948949975e-05, "loss": 0.7283, "step": 2395 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9516158236882047e-05, "loss": 0.834, "step": 2396 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9515545146245042e-05, "loss": 1.0293, "step": 2397 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.951493167706335e-05, "loss": 0.942, "step": 2398 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9514317829361374e-05, "loss": 0.7342, "step": 2399 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9513703603163536e-05, "loss": 0.7824, "step": 2400 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.951308899849427e-05, "loss": 0.7623, "step": 2401 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.951247401537803e-05, "loss": 0.9052, "step": 2402 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.951185865383928e-05, "loss": 0.819, "step": 2403 }, { "epoch": 0.43, "grad_norm": 0.0, "learning_rate": 1.9511242913902494e-05, "loss": 0.8728, "step": 2404 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.951062679559217e-05, "loss": 0.8559, "step": 2405 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9510010298932823e-05, "loss": 0.8041, "step": 2406 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9509393423948973e-05, "loss": 0.9091, "step": 2407 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.950877617066516e-05, "loss": 0.8499, "step": 2408 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9508158539105943e-05, "loss": 0.8112, "step": 2409 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9507540529295895e-05, "loss": 1.0035, "step": 2410 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9506922141259595e-05, "loss": 0.8774, "step": 2411 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9506303375021643e-05, "loss": 0.897, "step": 2412 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.950568423060666e-05, "loss": 0.8926, "step": 2413 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9505064708039274e-05, "loss": 0.9005, "step": 2414 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.950444480734413e-05, "loss": 0.7183, "step": 2415 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.950382452854589e-05, "loss": 0.8084, "step": 2416 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.950320387166923e-05, "loss": 0.8985, "step": 2417 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.950258283673884e-05, "loss": 0.9343, "step": 2418 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9501961423779424e-05, "loss": 0.834, "step": 2419 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.950133963281571e-05, "loss": 0.8447, "step": 2420 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9500717463872424e-05, "loss": 0.852, "step": 2421 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9500094916974318e-05, "loss": 0.9199, "step": 2422 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9499471992146167e-05, "loss": 0.7988, "step": 2423 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.949884868941275e-05, "loss": 0.8861, "step": 2424 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9498225008798854e-05, "loss": 0.8255, "step": 2425 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9497600950329297e-05, "loss": 0.7765, "step": 2426 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9496976514028904e-05, "loss": 0.8847, "step": 2427 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.949635169992252e-05, "loss": 0.8374, "step": 2428 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.949572650803499e-05, "loss": 0.882, "step": 2429 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9495100938391196e-05, "loss": 0.8279, "step": 2430 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9494474991016022e-05, "loss": 0.8511, "step": 2431 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9493848665934367e-05, "loss": 0.9433, "step": 2432 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9493221963171148e-05, "loss": 0.8633, "step": 2433 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9492594882751294e-05, "loss": 0.7964, "step": 2434 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9491967424699757e-05, "loss": 0.8331, "step": 2435 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9491339589041495e-05, "loss": 0.692, "step": 2436 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9490711375801484e-05, "loss": 0.8227, "step": 2437 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9490082785004713e-05, "loss": 0.6725, "step": 2438 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9489453816676196e-05, "loss": 0.818, "step": 2439 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.948882447084095e-05, "loss": 0.9137, "step": 2440 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.948819474752401e-05, "loss": 0.7842, "step": 2441 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9487564646750426e-05, "loss": 0.7132, "step": 2442 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.948693416854527e-05, "loss": 0.9524, "step": 2443 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.948630331293362e-05, "loss": 0.7859, "step": 2444 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9485672079940573e-05, "loss": 0.8084, "step": 2445 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.948504046959124e-05, "loss": 0.8813, "step": 2446 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.948440848191075e-05, "loss": 0.6709, "step": 2447 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9483776116924244e-05, "loss": 0.9343, "step": 2448 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9483143374656876e-05, "loss": 0.8108, "step": 2449 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.948251025513382e-05, "loss": 0.8663, "step": 2450 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.948187675838026e-05, "loss": 0.8927, "step": 2451 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9481242884421402e-05, "loss": 0.9324, "step": 2452 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9480608633282455e-05, "loss": 0.6516, "step": 2453 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9479974004988657e-05, "loss": 0.8211, "step": 2454 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9479338999565257e-05, "loss": 0.7732, "step": 2455 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.947870361703751e-05, "loss": 0.938, "step": 2456 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9478067857430692e-05, "loss": 0.917, "step": 2457 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9477431720770102e-05, "loss": 0.9539, "step": 2458 }, { "epoch": 0.44, "grad_norm": 0.0, "learning_rate": 1.9476795207081042e-05, "loss": 0.8899, "step": 2459 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9476158316388832e-05, "loss": 0.8428, "step": 2460 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9475521048718812e-05, "loss": 0.8063, "step": 2461 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.947488340409633e-05, "loss": 0.8344, "step": 2462 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9474245382546758e-05, "loss": 0.733, "step": 2463 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9473606984095474e-05, "loss": 0.9701, "step": 2464 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9472968208767872e-05, "loss": 0.8309, "step": 2465 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.947232905658937e-05, "loss": 0.7651, "step": 2466 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.947168952758539e-05, "loss": 0.9879, "step": 2467 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9471049621781372e-05, "loss": 0.8572, "step": 2468 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.947040933920278e-05, "loss": 0.8649, "step": 2469 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9469768679875078e-05, "loss": 0.7744, "step": 2470 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9469127643823757e-05, "loss": 0.9369, "step": 2471 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.946848623107431e-05, "loss": 0.7621, "step": 2472 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.946784444165227e-05, "loss": 0.8953, "step": 2473 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9467202275583153e-05, "loss": 0.8846, "step": 2474 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9466559732892515e-05, "loss": 0.907, "step": 2475 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.946591681360591e-05, "loss": 0.9736, "step": 2476 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.946527351774892e-05, "loss": 0.8517, "step": 2477 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9464629845347134e-05, "loss": 1.0079, "step": 2478 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9463985796426162e-05, "loss": 0.8808, "step": 2479 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9463341371011618e-05, "loss": 0.9727, "step": 2480 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9462696569129143e-05, "loss": 0.8109, "step": 2481 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.946205139080439e-05, "loss": 0.8185, "step": 2482 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9461405836063024e-05, "loss": 0.7952, "step": 2483 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9460759904930722e-05, "loss": 0.844, "step": 2484 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9460113597433187e-05, "loss": 0.9042, "step": 2485 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9459466913596126e-05, "loss": 0.8363, "step": 2486 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9458819853445264e-05, "loss": 0.77, "step": 2487 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9458172417006347e-05, "loss": 0.8511, "step": 2488 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.945752460430513e-05, "loss": 0.8211, "step": 2489 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.945687641536738e-05, "loss": 0.9668, "step": 2490 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9456227850218888e-05, "loss": 0.7095, "step": 2491 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9455578908885448e-05, "loss": 0.8061, "step": 2492 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9454929591392883e-05, "loss": 0.8531, "step": 2493 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9454279897767026e-05, "loss": 0.8756, "step": 2494 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9453629828033713e-05, "loss": 0.7397, "step": 2495 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9452979382218814e-05, "loss": 0.7823, "step": 2496 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.94523285603482e-05, "loss": 0.8166, "step": 2497 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9451677362447762e-05, "loss": 0.976, "step": 2498 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9451025788543404e-05, "loss": 0.7906, "step": 2499 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9450373838661055e-05, "loss": 0.9528, "step": 2500 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9449721512826644e-05, "loss": 0.7631, "step": 2501 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.944906881106612e-05, "loss": 0.9365, "step": 2502 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9448415733405455e-05, "loss": 0.9868, "step": 2503 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.944776227987063e-05, "loss": 0.8466, "step": 2504 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.944710845048763e-05, "loss": 0.8199, "step": 2505 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9446454245282474e-05, "loss": 0.8332, "step": 2506 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9445799664281184e-05, "loss": 0.967, "step": 2507 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9445144707509804e-05, "loss": 0.8414, "step": 2508 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9444489374994388e-05, "loss": 0.8844, "step": 2509 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9443833666761e-05, "loss": 0.8664, "step": 2510 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.944317758283574e-05, "loss": 0.8412, "step": 2511 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.944252112324469e-05, "loss": 0.8391, "step": 2512 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9441864288013973e-05, "loss": 0.8177, "step": 2513 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 1.9441207077169725e-05, "loss": 0.804, "step": 2514 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9440549490738084e-05, "loss": 0.7705, "step": 2515 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9439891528745215e-05, "loss": 0.8765, "step": 2516 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9439233191217283e-05, "loss": 0.8099, "step": 2517 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.943857447818049e-05, "loss": 0.915, "step": 2518 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.943791538966103e-05, "loss": 0.8333, "step": 2519 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.943725592568513e-05, "loss": 0.8506, "step": 2520 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.943659608627903e-05, "loss": 0.767, "step": 2521 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.943593587146896e-05, "loss": 1.0326, "step": 2522 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9435275281281202e-05, "loss": 0.7764, "step": 2523 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9434614315742028e-05, "loss": 0.9623, "step": 2524 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9433952974877733e-05, "loss": 0.702, "step": 2525 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9433291258714634e-05, "loss": 0.9209, "step": 2526 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9432629167279043e-05, "loss": 0.6851, "step": 2527 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9431966700597305e-05, "loss": 0.8005, "step": 2528 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9431303858695777e-05, "loss": 0.8528, "step": 2529 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.943064064160082e-05, "loss": 0.7212, "step": 2530 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9429977049338825e-05, "loss": 0.917, "step": 2531 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.942931308193619e-05, "loss": 0.7848, "step": 2532 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9428648739419326e-05, "loss": 0.8615, "step": 2533 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9427984021814663e-05, "loss": 0.8918, "step": 2534 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9427318929148647e-05, "loss": 1.0118, "step": 2535 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.942665346144773e-05, "loss": 0.761, "step": 2536 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9425987618738392e-05, "loss": 0.8268, "step": 2537 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9425321401047118e-05, "loss": 0.8216, "step": 2538 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9424654808400413e-05, "loss": 0.833, "step": 2539 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9423987840824792e-05, "loss": 0.9418, "step": 2540 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9423320498346792e-05, "loss": 0.9223, "step": 2541 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.942265278099296e-05, "loss": 0.7939, "step": 2542 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.942198468878986e-05, "loss": 0.8244, "step": 2543 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9421316221764065e-05, "loss": 0.721, "step": 2544 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9420647379942172e-05, "loss": 0.9095, "step": 2545 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.941997816335079e-05, "loss": 0.8858, "step": 2546 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.941930857201654e-05, "loss": 0.9018, "step": 2547 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9418638605966054e-05, "loss": 0.9991, "step": 2548 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.941796826522599e-05, "loss": 0.7551, "step": 2549 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9417297549823018e-05, "loss": 0.9327, "step": 2550 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9416626459783816e-05, "loss": 0.8225, "step": 2551 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.941595499513508e-05, "loss": 0.9098, "step": 2552 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9415283155903526e-05, "loss": 0.7765, "step": 2553 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9414610942115878e-05, "loss": 0.8178, "step": 2554 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9413938353798877e-05, "loss": 0.919, "step": 2555 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9413265390979282e-05, "loss": 0.7485, "step": 2556 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9412592053683867e-05, "loss": 0.7976, "step": 2557 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.941191834193941e-05, "loss": 0.8676, "step": 2558 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.941124425577272e-05, "loss": 0.7993, "step": 2559 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9410569795210613e-05, "loss": 0.8788, "step": 2560 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.940989496027992e-05, "loss": 0.8639, "step": 2561 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.940921975100748e-05, "loss": 0.8748, "step": 2562 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.940854416742016e-05, "loss": 0.8971, "step": 2563 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.940786820954484e-05, "loss": 0.8169, "step": 2564 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9407191877408404e-05, "loss": 0.7642, "step": 2565 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9406515171037757e-05, "loss": 0.8803, "step": 2566 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9405838090459826e-05, "loss": 0.8674, "step": 2567 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.940516063570154e-05, "loss": 0.8919, "step": 2568 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9404482806789854e-05, "loss": 0.7601, "step": 2569 }, { "epoch": 0.46, "grad_norm": 0.0, "learning_rate": 1.9403804603751732e-05, "loss": 1.0179, "step": 2570 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.940312602661415e-05, "loss": 0.8877, "step": 2571 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9402447075404107e-05, "loss": 0.8336, "step": 2572 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9401767750148615e-05, "loss": 0.7378, "step": 2573 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9401088050874693e-05, "loss": 0.8871, "step": 2574 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9400407977609382e-05, "loss": 0.8923, "step": 2575 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.939972753037974e-05, "loss": 0.9982, "step": 2576 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9399046709212834e-05, "loss": 0.9328, "step": 2577 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.939836551413575e-05, "loss": 0.7798, "step": 2578 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9397683945175584e-05, "loss": 0.747, "step": 2579 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.939700200235945e-05, "loss": 0.8173, "step": 2580 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.939631968571448e-05, "loss": 0.987, "step": 2581 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9395636995267816e-05, "loss": 0.8016, "step": 2582 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.939495393104662e-05, "loss": 0.9538, "step": 2583 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9394270493078058e-05, "loss": 0.8338, "step": 2584 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9393586681389324e-05, "loss": 1.0286, "step": 2585 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9392902496007617e-05, "loss": 0.7456, "step": 2586 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9392217936960162e-05, "loss": 0.8481, "step": 2587 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9391533004274185e-05, "loss": 0.7069, "step": 2588 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9390847697976937e-05, "loss": 0.982, "step": 2589 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.939016201809568e-05, "loss": 0.8596, "step": 2590 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.938947596465769e-05, "loss": 0.7368, "step": 2591 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9388789537690263e-05, "loss": 0.8985, "step": 2592 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9388102737220706e-05, "loss": 0.8459, "step": 2593 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.938741556327634e-05, "loss": 0.7334, "step": 2594 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9386728015884495e-05, "loss": 0.7816, "step": 2595 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9386040095072533e-05, "loss": 0.7761, "step": 2596 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.938535180086782e-05, "loss": 0.8646, "step": 2597 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.938466313329773e-05, "loss": 0.8776, "step": 2598 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9383974092389666e-05, "loss": 0.9, "step": 2599 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9383284678171035e-05, "loss": 0.8554, "step": 2600 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9382594890669266e-05, "loss": 0.783, "step": 2601 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9381904729911797e-05, "loss": 0.656, "step": 2602 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.938121419592609e-05, "loss": 0.8188, "step": 2603 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.938052328873961e-05, "loss": 0.8345, "step": 2604 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.937983200837984e-05, "loss": 0.8948, "step": 2605 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9379140354874287e-05, "loss": 0.7989, "step": 2606 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9378448328250467e-05, "loss": 1.0082, "step": 2607 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.93777559285359e-05, "loss": 0.7808, "step": 2608 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.937706315575814e-05, "loss": 0.9087, "step": 2609 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9376370009944747e-05, "loss": 0.8635, "step": 2610 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.937567649112329e-05, "loss": 0.7633, "step": 2611 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9374982599321358e-05, "loss": 0.841, "step": 2612 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.937428833456656e-05, "loss": 0.8289, "step": 2613 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9373593696886516e-05, "loss": 0.8349, "step": 2614 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9372898686308855e-05, "loss": 0.8461, "step": 2615 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9372203302861227e-05, "loss": 0.7826, "step": 2616 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9371507546571294e-05, "loss": 0.8948, "step": 2617 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.937081141746674e-05, "loss": 0.7982, "step": 2618 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.937011491557525e-05, "loss": 0.709, "step": 2619 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9369418040924543e-05, "loss": 0.732, "step": 2620 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.936872079354233e-05, "loss": 0.8747, "step": 2621 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9368023173456357e-05, "loss": 0.8801, "step": 2622 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9367325180694373e-05, "loss": 0.8062, "step": 2623 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.9366626815284146e-05, "loss": 0.9617, "step": 2624 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 1.936592807725346e-05, "loss": 0.7656, "step": 2625 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.936522896663011e-05, "loss": 0.9895, "step": 2626 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9364529483441907e-05, "loss": 0.8897, "step": 2627 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9363829627716678e-05, "loss": 0.91, "step": 2628 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9363129399482266e-05, "loss": 0.8154, "step": 2629 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.936242879876653e-05, "loss": 0.9551, "step": 2630 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9361727825597334e-05, "loss": 0.8062, "step": 2631 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9361026480002567e-05, "loss": 0.9668, "step": 2632 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9360324762010135e-05, "loss": 0.7591, "step": 2633 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9359622671647945e-05, "loss": 0.8619, "step": 2634 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.935892020894393e-05, "loss": 0.8708, "step": 2635 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.935821737392604e-05, "loss": 0.6949, "step": 2636 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.935751416662223e-05, "loss": 0.726, "step": 2637 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9356810587060475e-05, "loss": 0.9284, "step": 2638 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9356106635268767e-05, "loss": 0.6657, "step": 2639 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9355402311275108e-05, "loss": 0.7906, "step": 2640 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9354697615107518e-05, "loss": 0.769, "step": 2641 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9353992546794034e-05, "loss": 0.7894, "step": 2642 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.93532871063627e-05, "loss": 0.7132, "step": 2643 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.935258129384158e-05, "loss": 0.8079, "step": 2644 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.935187510925876e-05, "loss": 0.6602, "step": 2645 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9351168552642324e-05, "loss": 0.7869, "step": 2646 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9350461624020384e-05, "loss": 0.8553, "step": 2647 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.934975432342106e-05, "loss": 0.7063, "step": 2648 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9349046650872493e-05, "loss": 0.8071, "step": 2649 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9348338606402837e-05, "loss": 0.8375, "step": 2650 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9347630190040254e-05, "loss": 0.7763, "step": 2651 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.934692140181293e-05, "loss": 0.9401, "step": 2652 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9346212241749062e-05, "loss": 0.9025, "step": 2653 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9345502709876858e-05, "loss": 0.8395, "step": 2654 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9344792806224546e-05, "loss": 0.9742, "step": 2655 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9344082530820367e-05, "loss": 0.9068, "step": 2656 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.934337188369258e-05, "loss": 0.8197, "step": 2657 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9342660864869453e-05, "loss": 0.9121, "step": 2658 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.934194947437927e-05, "loss": 0.8067, "step": 2659 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9341237712250336e-05, "loss": 0.8339, "step": 2660 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9340525578510964e-05, "loss": 0.8524, "step": 2661 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.933981307318948e-05, "loss": 0.8214, "step": 2662 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9339100196314233e-05, "loss": 0.808, "step": 2663 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.933838694791358e-05, "loss": 0.8696, "step": 2664 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.93376733280159e-05, "loss": 1.0304, "step": 2665 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9336959336649578e-05, "loss": 0.7391, "step": 2666 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9336244973843015e-05, "loss": 0.81, "step": 2667 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.933553023962464e-05, "loss": 0.995, "step": 2668 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9334815134022873e-05, "loss": 0.9251, "step": 2669 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.933409965706617e-05, "loss": 0.8755, "step": 2670 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9333383808782993e-05, "loss": 0.964, "step": 2671 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.933266758920182e-05, "loss": 0.8003, "step": 2672 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9331950998351142e-05, "loss": 0.8715, "step": 2673 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9331234036259466e-05, "loss": 0.8642, "step": 2674 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9330516702955314e-05, "loss": 0.9203, "step": 2675 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9329798998467226e-05, "loss": 0.8959, "step": 2676 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9329080922823747e-05, "loss": 0.8347, "step": 2677 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9328362476053454e-05, "loss": 0.9184, "step": 2678 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9327643658184917e-05, "loss": 0.8648, "step": 2679 }, { "epoch": 0.48, "grad_norm": 0.0, "learning_rate": 1.9326924469246734e-05, "loss": 0.8169, "step": 2680 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.932620490926752e-05, "loss": 0.9798, "step": 2681 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9325484978275897e-05, "loss": 0.9103, "step": 2682 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.932476467630051e-05, "loss": 0.8248, "step": 2683 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9324044003370006e-05, "loss": 0.9337, "step": 2684 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.932332295951306e-05, "loss": 0.8947, "step": 2685 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9322601544758357e-05, "loss": 0.8136, "step": 2686 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9321879759134588e-05, "loss": 0.762, "step": 2687 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.932115760267048e-05, "loss": 0.8306, "step": 2688 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9320435075394753e-05, "loss": 0.9214, "step": 2689 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9319712177336146e-05, "loss": 0.8285, "step": 2690 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.931898890852343e-05, "loss": 0.7551, "step": 2691 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9318265268985367e-05, "loss": 0.8054, "step": 2692 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9317541258750752e-05, "loss": 0.8454, "step": 2693 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.931681687784838e-05, "loss": 0.8267, "step": 2694 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9316092126307074e-05, "loss": 0.9464, "step": 2695 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9315367004155663e-05, "loss": 0.7209, "step": 2696 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9314641511422995e-05, "loss": 0.8838, "step": 2697 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9313915648137928e-05, "loss": 0.8083, "step": 2698 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9313189414329344e-05, "loss": 0.7873, "step": 2699 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.931246281002613e-05, "loss": 0.9316, "step": 2700 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9311735835257196e-05, "loss": 0.9433, "step": 2701 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9311008490051454e-05, "loss": 0.942, "step": 2702 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9310280774437845e-05, "loss": 0.8293, "step": 2703 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9309552688445317e-05, "loss": 0.8762, "step": 2704 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9308824232102833e-05, "loss": 0.8317, "step": 2705 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9308095405439376e-05, "loss": 0.8689, "step": 2706 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.930736620848394e-05, "loss": 0.7221, "step": 2707 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.930663664126553e-05, "loss": 0.963, "step": 2708 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.930590670381317e-05, "loss": 0.8405, "step": 2709 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9305176396155904e-05, "loss": 0.8434, "step": 2710 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9304445718322778e-05, "loss": 0.8815, "step": 2711 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.930371467034286e-05, "loss": 0.7714, "step": 2712 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9302983252245234e-05, "loss": 0.8307, "step": 2713 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9302251464059e-05, "loss": 0.8489, "step": 2714 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9301519305813267e-05, "loss": 0.8532, "step": 2715 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.930078677753716e-05, "loss": 0.7802, "step": 2716 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.930005387925982e-05, "loss": 0.7435, "step": 2717 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.929932061101041e-05, "loss": 0.8777, "step": 2718 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9298586972818092e-05, "loss": 1.0305, "step": 2719 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9297852964712057e-05, "loss": 0.9531, "step": 2720 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.92971185867215e-05, "loss": 0.8614, "step": 2721 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9296383838875638e-05, "loss": 0.7511, "step": 2722 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9295648721203706e-05, "loss": 0.7313, "step": 2723 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.929491323373494e-05, "loss": 0.88, "step": 2724 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9294177376498603e-05, "loss": 0.855, "step": 2725 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.929344114952397e-05, "loss": 0.9279, "step": 2726 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9292704552840326e-05, "loss": 0.8493, "step": 2727 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9291967586476977e-05, "loss": 0.8947, "step": 2728 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.929123025046324e-05, "loss": 0.8431, "step": 2729 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9290492544828443e-05, "loss": 0.9107, "step": 2730 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9289754469601937e-05, "loss": 0.7149, "step": 2731 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9289016024813086e-05, "loss": 0.8848, "step": 2732 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9288277210491265e-05, "loss": 0.773, "step": 2733 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.9287538026665866e-05, "loss": 0.804, "step": 2734 }, { "epoch": 0.49, "grad_norm": 0.0, "learning_rate": 1.928679847336629e-05, "loss": 0.7148, "step": 2735 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9286058550621967e-05, "loss": 0.939, "step": 2736 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9285318258462325e-05, "loss": 0.8059, "step": 2737 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9284577596916812e-05, "loss": 0.6737, "step": 2738 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.92838365660149e-05, "loss": 0.7224, "step": 2739 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9283095165786067e-05, "loss": 0.9931, "step": 2740 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9282353396259802e-05, "loss": 0.9193, "step": 2741 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.928161125746562e-05, "loss": 0.8711, "step": 2742 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.928086874943304e-05, "loss": 0.7933, "step": 2743 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9280125872191605e-05, "loss": 0.8307, "step": 2744 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9279382625770863e-05, "loss": 0.7674, "step": 2745 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9278639010200383e-05, "loss": 0.8139, "step": 2746 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.927789502550975e-05, "loss": 0.9856, "step": 2747 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9277150671728554e-05, "loss": 0.8144, "step": 2748 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9276405948886417e-05, "loss": 0.9395, "step": 2749 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.927566085701296e-05, "loss": 0.8024, "step": 2750 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9274915396137823e-05, "loss": 0.8199, "step": 2751 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9274169566290658e-05, "loss": 0.8292, "step": 2752 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9273423367501145e-05, "loss": 0.8935, "step": 2753 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9272676799798965e-05, "loss": 0.9094, "step": 2754 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9271929863213815e-05, "loss": 0.9685, "step": 2755 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.927118255777541e-05, "loss": 0.97, "step": 2756 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9270434883513485e-05, "loss": 0.9162, "step": 2757 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9269686840457774e-05, "loss": 0.9086, "step": 2758 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9268938428638043e-05, "loss": 0.8555, "step": 2759 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9268189648084064e-05, "loss": 0.808, "step": 2760 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.926744049882562e-05, "loss": 0.9488, "step": 2761 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.926669098089252e-05, "loss": 0.7169, "step": 2762 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.926594109431458e-05, "loss": 0.7585, "step": 2763 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9265190839121625e-05, "loss": 0.7839, "step": 2764 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9264440215343508e-05, "loss": 0.8161, "step": 2765 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.926368922301009e-05, "loss": 0.8569, "step": 2766 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9262937862151245e-05, "loss": 1.0771, "step": 2767 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.926218613279686e-05, "loss": 0.8724, "step": 2768 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9261434034976848e-05, "loss": 0.9466, "step": 2769 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.926068156872112e-05, "loss": 0.8688, "step": 2770 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9259928734059618e-05, "loss": 0.7231, "step": 2771 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9259175531022287e-05, "loss": 0.8419, "step": 2772 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9258421959639092e-05, "loss": 0.8232, "step": 2773 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.925766801994001e-05, "loss": 0.9775, "step": 2774 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9256913711955035e-05, "loss": 0.726, "step": 2775 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9256159035714173e-05, "loss": 0.8586, "step": 2776 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9255403991247452e-05, "loss": 0.8536, "step": 2777 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.92546485785849e-05, "loss": 0.8841, "step": 2778 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.925389279775658e-05, "loss": 0.9735, "step": 2779 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9253136648792545e-05, "loss": 0.7094, "step": 2780 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9252380131722884e-05, "loss": 0.7869, "step": 2781 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9251623246577695e-05, "loss": 0.8562, "step": 2782 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.925086599338708e-05, "loss": 0.772, "step": 2783 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.925010837218117e-05, "loss": 0.9524, "step": 2784 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9249350382990103e-05, "loss": 0.8902, "step": 2785 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9248592025844034e-05, "loss": 0.6702, "step": 2786 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.924783330077313e-05, "loss": 0.7512, "step": 2787 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9247074207807573e-05, "loss": 0.8278, "step": 2788 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.924631474697756e-05, "loss": 0.7995, "step": 2789 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9245554918313313e-05, "loss": 0.8147, "step": 2790 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.9244794721845048e-05, "loss": 0.8331, "step": 2791 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9244034157603016e-05, "loss": 0.7582, "step": 2792 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9243273225617466e-05, "loss": 0.9379, "step": 2793 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9242511925918675e-05, "loss": 0.819, "step": 2794 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9241750258536925e-05, "loss": 0.8788, "step": 2795 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9240988223502516e-05, "loss": 0.8113, "step": 2796 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9240225820845766e-05, "loss": 0.843, "step": 2797 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9239463050597005e-05, "loss": 0.8075, "step": 2798 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9238699912786573e-05, "loss": 0.7823, "step": 2799 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9237936407444836e-05, "loss": 0.984, "step": 2800 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.923717253460216e-05, "loss": 0.9607, "step": 2801 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9236408294288936e-05, "loss": 0.7866, "step": 2802 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.923564368653557e-05, "loss": 0.7824, "step": 2803 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9234878711372473e-05, "loss": 0.8162, "step": 2804 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.923411336883008e-05, "loss": 0.8504, "step": 2805 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9233347658938842e-05, "loss": 0.7744, "step": 2806 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.923258158172922e-05, "loss": 0.9644, "step": 2807 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9231815137231676e-05, "loss": 0.9443, "step": 2808 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9231048325476717e-05, "loss": 0.8197, "step": 2809 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9230281146494842e-05, "loss": 0.968, "step": 2810 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9229513600316568e-05, "loss": 0.8962, "step": 2811 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9228745686972435e-05, "loss": 0.7233, "step": 2812 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9227977406492992e-05, "loss": 0.9334, "step": 2813 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9227208758908794e-05, "loss": 0.8136, "step": 2814 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9226439744250425e-05, "loss": 0.9894, "step": 2815 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9225670362548478e-05, "loss": 0.9187, "step": 2816 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9224900613833558e-05, "loss": 0.7375, "step": 2817 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.922413049813629e-05, "loss": 0.8755, "step": 2818 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.922336001548731e-05, "loss": 0.9417, "step": 2819 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9222589165917265e-05, "loss": 0.7293, "step": 2820 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9221817949456828e-05, "loss": 0.8719, "step": 2821 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9221046366136673e-05, "loss": 0.8013, "step": 2822 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9220274415987498e-05, "loss": 1.0054, "step": 2823 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.921950209904001e-05, "loss": 0.8699, "step": 2824 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9218729415324935e-05, "loss": 0.8421, "step": 2825 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.921795636487301e-05, "loss": 0.7696, "step": 2826 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.921718294771499e-05, "loss": 0.7621, "step": 2827 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9216409163881643e-05, "loss": 0.9106, "step": 2828 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.921563501340375e-05, "loss": 0.8609, "step": 2829 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.921486049631211e-05, "loss": 0.9048, "step": 2830 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9214085612637537e-05, "loss": 0.7418, "step": 2831 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9213310362410848e-05, "loss": 0.8527, "step": 2832 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9212534745662893e-05, "loss": 0.8098, "step": 2833 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9211758762424523e-05, "loss": 0.6914, "step": 2834 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.921098241272661e-05, "loss": 0.8258, "step": 2835 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9210205696600036e-05, "loss": 0.9108, "step": 2836 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.92094286140757e-05, "loss": 0.835, "step": 2837 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.920865116518452e-05, "loss": 1.0038, "step": 2838 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9207873349957418e-05, "loss": 0.888, "step": 2839 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9207095168425343e-05, "loss": 0.8185, "step": 2840 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9206316620619248e-05, "loss": 0.9133, "step": 2841 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9205537706570106e-05, "loss": 0.7861, "step": 2842 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.920475842630891e-05, "loss": 1.0094, "step": 2843 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9203978779866647e-05, "loss": 0.7948, "step": 2844 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9203198767274343e-05, "loss": 0.8217, "step": 2845 }, { "epoch": 0.51, "grad_norm": 0.0, "learning_rate": 1.9202418388563026e-05, "loss": 0.8839, "step": 2846 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9201637643763743e-05, "loss": 0.9702, "step": 2847 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9200856532907554e-05, "loss": 0.8894, "step": 2848 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9200075056025525e-05, "loss": 0.9207, "step": 2849 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.919929321314875e-05, "loss": 0.8761, "step": 2850 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9198511004308334e-05, "loss": 0.7972, "step": 2851 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9197728429535392e-05, "loss": 0.7559, "step": 2852 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9196945488861053e-05, "loss": 0.7177, "step": 2853 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.919616218231647e-05, "loss": 0.8118, "step": 2854 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9195378509932803e-05, "loss": 0.8991, "step": 2855 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9194594471741225e-05, "loss": 0.8467, "step": 2856 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9193810067772925e-05, "loss": 0.9491, "step": 2857 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9193025298059113e-05, "loss": 0.8432, "step": 2858 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.919224016263101e-05, "loss": 0.9307, "step": 2859 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.919145466151984e-05, "loss": 0.7621, "step": 2860 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.919066879475686e-05, "loss": 0.7265, "step": 2861 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.918988256237333e-05, "loss": 0.8143, "step": 2862 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.918909596440053e-05, "loss": 0.7094, "step": 2863 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9188309000869752e-05, "loss": 0.8921, "step": 2864 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.91875216718123e-05, "loss": 0.8094, "step": 2865 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9186733977259497e-05, "loss": 0.9777, "step": 2866 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9185945917242677e-05, "loss": 0.7253, "step": 2867 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9185157491793194e-05, "loss": 0.6722, "step": 2868 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.918436870094241e-05, "loss": 0.8654, "step": 2869 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9183579544721708e-05, "loss": 0.7045, "step": 2870 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9182790023162473e-05, "loss": 0.9204, "step": 2871 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9182000136296123e-05, "loss": 0.909, "step": 2872 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9181209884154078e-05, "loss": 0.9424, "step": 2873 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9180419266767777e-05, "loss": 0.8252, "step": 2874 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.917962828416867e-05, "loss": 0.7453, "step": 2875 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9178836936388227e-05, "loss": 0.9066, "step": 2876 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.917804522345792e-05, "loss": 0.636, "step": 2877 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9177253145409257e-05, "loss": 0.9831, "step": 2878 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.917646070227374e-05, "loss": 0.7862, "step": 2879 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9175667894082896e-05, "loss": 0.8613, "step": 2880 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9174874720868268e-05, "loss": 0.8181, "step": 2881 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.91740811826614e-05, "loss": 0.9296, "step": 2882 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.917328727949387e-05, "loss": 0.9216, "step": 2883 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9172493011397256e-05, "loss": 0.7757, "step": 2884 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9171698378403156e-05, "loss": 0.9551, "step": 2885 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9170903380543183e-05, "loss": 0.8385, "step": 2886 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9170108017848966e-05, "loss": 0.8643, "step": 2887 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.916931229035214e-05, "loss": 0.7535, "step": 2888 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9168516198084365e-05, "loss": 0.966, "step": 2889 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9167719741077306e-05, "loss": 0.8303, "step": 2890 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9166922919362654e-05, "loss": 0.771, "step": 2891 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.91661257329721e-05, "loss": 0.8869, "step": 2892 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9165328181937365e-05, "loss": 0.9102, "step": 2893 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9164530266290172e-05, "loss": 0.8037, "step": 2894 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9163731986062268e-05, "loss": 0.7259, "step": 2895 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9162933341285407e-05, "loss": 0.8933, "step": 2896 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.916213433199136e-05, "loss": 0.8759, "step": 2897 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9161334958211912e-05, "loss": 0.7478, "step": 2898 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9160535219978867e-05, "loss": 0.9386, "step": 2899 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.915973511732404e-05, "loss": 0.7417, "step": 2900 }, { "epoch": 0.52, "grad_norm": 0.0, "learning_rate": 1.9158934650279252e-05, "loss": 0.9094, "step": 2901 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9158133818876358e-05, "loss": 0.8782, "step": 2902 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9157332623147212e-05, "loss": 0.7934, "step": 2903 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9156531063123687e-05, "loss": 0.8396, "step": 2904 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.915572913883767e-05, "loss": 0.867, "step": 2905 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9154926850321064e-05, "loss": 0.8349, "step": 2906 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9154124197605782e-05, "loss": 0.9739, "step": 2907 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.915332118072376e-05, "loss": 0.7449, "step": 2908 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.915251779970694e-05, "loss": 0.7846, "step": 2909 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9151714054587282e-05, "loss": 0.8435, "step": 2910 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9150909945396764e-05, "loss": 0.9194, "step": 2911 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9150105472167368e-05, "loss": 0.9278, "step": 2912 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9149300634931106e-05, "loss": 0.8198, "step": 2913 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9148495433719986e-05, "loss": 0.8596, "step": 2914 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9147689868566046e-05, "loss": 0.8606, "step": 2915 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9146883939501337e-05, "loss": 0.9282, "step": 2916 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.914607764655791e-05, "loss": 1.049, "step": 2917 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.914527098976785e-05, "loss": 0.8856, "step": 2918 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.914446396916324e-05, "loss": 0.9008, "step": 2919 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9143656584776192e-05, "loss": 0.8683, "step": 2920 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.914284883663882e-05, "loss": 0.8429, "step": 2921 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9142040724783254e-05, "loss": 0.9756, "step": 2922 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9141232249241654e-05, "loss": 1.025, "step": 2923 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.914042341004617e-05, "loss": 0.8818, "step": 2924 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9139614207228986e-05, "loss": 0.826, "step": 2925 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.913880464082229e-05, "loss": 0.7024, "step": 2926 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9137994710858292e-05, "loss": 0.7567, "step": 2927 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9137184417369213e-05, "loss": 0.9194, "step": 2928 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.913637376038728e-05, "loss": 0.8981, "step": 2929 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.913556273994475e-05, "loss": 0.7838, "step": 2930 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9134751356073882e-05, "loss": 0.8431, "step": 2931 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9133939608806957e-05, "loss": 0.7531, "step": 2932 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9133127498176267e-05, "loss": 0.8243, "step": 2933 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.913231502421412e-05, "loss": 0.8326, "step": 2934 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9131502186952836e-05, "loss": 0.9697, "step": 2935 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.913068898642475e-05, "loss": 0.868, "step": 2936 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.912987542266222e-05, "loss": 0.8622, "step": 2937 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9129061495697602e-05, "loss": 0.8004, "step": 2938 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.912824720556328e-05, "loss": 0.8219, "step": 2939 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9127432552291645e-05, "loss": 0.8287, "step": 2940 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9126617535915107e-05, "loss": 0.904, "step": 2941 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9125802156466092e-05, "loss": 0.8798, "step": 2942 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.912498641397703e-05, "loss": 0.8036, "step": 2943 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9124170308480377e-05, "loss": 0.8228, "step": 2944 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.91233538400086e-05, "loss": 0.7397, "step": 2945 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9122537008594176e-05, "loss": 0.9545, "step": 2946 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9121719814269603e-05, "loss": 0.8658, "step": 2947 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.912090225706739e-05, "loss": 0.8019, "step": 2948 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.912008433702006e-05, "loss": 0.7419, "step": 2949 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.911926605416015e-05, "loss": 0.8027, "step": 2950 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9118447408520214e-05, "loss": 0.7626, "step": 2951 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9117628400132823e-05, "loss": 0.8125, "step": 2952 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.911680902903055e-05, "loss": 0.8603, "step": 2953 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9115989295246003e-05, "loss": 0.8765, "step": 2954 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9115169198811777e-05, "loss": 0.823, "step": 2955 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.911434873976051e-05, "loss": 0.7648, "step": 2956 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 1.9113527918124836e-05, "loss": 0.9203, "step": 2957 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.911270673393741e-05, "loss": 1.0064, "step": 2958 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9111885187230895e-05, "loss": 0.9119, "step": 2959 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.911106327803798e-05, "loss": 0.9286, "step": 2960 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9110241006391363e-05, "loss": 0.8068, "step": 2961 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.910941837232375e-05, "loss": 0.8498, "step": 2962 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9108595375867867e-05, "loss": 0.7438, "step": 2963 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9107772017056455e-05, "loss": 0.9012, "step": 2964 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9106948295922273e-05, "loss": 0.9376, "step": 2965 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9106124212498085e-05, "loss": 0.836, "step": 2966 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9105299766816676e-05, "loss": 0.8266, "step": 2967 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9104474958910847e-05, "loss": 0.9894, "step": 2968 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9103649788813407e-05, "loss": 0.8615, "step": 2969 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9102824256557178e-05, "loss": 0.8988, "step": 2970 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.910199836217501e-05, "loss": 0.8372, "step": 2971 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9101172105699754e-05, "loss": 0.8622, "step": 2972 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9100345487164278e-05, "loss": 0.8804, "step": 2973 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.909951850660147e-05, "loss": 0.8585, "step": 2974 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.909869116404423e-05, "loss": 0.9623, "step": 2975 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9097863459525468e-05, "loss": 0.8667, "step": 2976 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.909703539307811e-05, "loss": 0.8273, "step": 2977 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.90962069647351e-05, "loss": 0.676, "step": 2978 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9095378174529394e-05, "loss": 0.9043, "step": 2979 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9094549022493965e-05, "loss": 0.7415, "step": 2980 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9093719508661796e-05, "loss": 0.8996, "step": 2981 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9092889633065884e-05, "loss": 0.7268, "step": 2982 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.909205939573925e-05, "loss": 0.8406, "step": 2983 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9091228796714912e-05, "loss": 0.8423, "step": 2984 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.909039783602592e-05, "loss": 0.7841, "step": 2985 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9089566513705335e-05, "loss": 0.8522, "step": 2986 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9088734829786217e-05, "loss": 0.9321, "step": 2987 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.908790278430166e-05, "loss": 0.7979, "step": 2988 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9087070377284758e-05, "loss": 0.9375, "step": 2989 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9086237608768635e-05, "loss": 0.9053, "step": 2990 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.908540447878641e-05, "loss": 0.819, "step": 2991 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.908457098737123e-05, "loss": 0.7879, "step": 2992 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.908373713455626e-05, "loss": 0.827, "step": 2993 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.908290292037466e-05, "loss": 0.8176, "step": 2994 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9082068344859623e-05, "loss": 0.8841, "step": 2995 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9081233408044346e-05, "loss": 0.8562, "step": 2996 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.908039810996205e-05, "loss": 0.8841, "step": 2997 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9079562450645962e-05, "loss": 0.8133, "step": 2998 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9078726430129323e-05, "loss": 0.8233, "step": 2999 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9077890048445397e-05, "loss": 0.6823, "step": 3000 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9077053305627453e-05, "loss": 0.8898, "step": 3001 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9076216201708776e-05, "loss": 0.6927, "step": 3002 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9075378736722674e-05, "loss": 0.83, "step": 3003 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9074540910702457e-05, "loss": 0.9461, "step": 3004 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.907370272368146e-05, "loss": 0.8036, "step": 3005 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9072864175693017e-05, "loss": 0.8767, "step": 3006 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.90720252667705e-05, "loss": 0.8135, "step": 3007 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9071185996947276e-05, "loss": 0.8419, "step": 3008 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.907034636625673e-05, "loss": 0.8519, "step": 3009 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.9069506374732272e-05, "loss": 0.7933, "step": 3010 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.906866602240731e-05, "loss": 0.8409, "step": 3011 }, { "epoch": 0.54, "grad_norm": 0.0, "learning_rate": 1.906782530931528e-05, "loss": 0.8835, "step": 3012 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.906698423548963e-05, "loss": 0.787, "step": 3013 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9066142800963806e-05, "loss": 0.8773, "step": 3014 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9065301005771294e-05, "loss": 0.7726, "step": 3015 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9064458849945574e-05, "loss": 0.8611, "step": 3016 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.906361633352016e-05, "loss": 0.9391, "step": 3017 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9062773456528555e-05, "loss": 0.8597, "step": 3018 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9061930219004302e-05, "loss": 0.7221, "step": 3019 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.906108662098094e-05, "loss": 0.8645, "step": 3020 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9060242662492026e-05, "loss": 0.7921, "step": 3021 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.905939834357114e-05, "loss": 1.1021, "step": 3022 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9058553664251866e-05, "loss": 0.9652, "step": 3023 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.905770862456781e-05, "loss": 0.9688, "step": 3024 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9056863224552594e-05, "loss": 0.7642, "step": 3025 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9056017464239834e-05, "loss": 0.9449, "step": 3026 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9055171343663192e-05, "loss": 0.7981, "step": 3027 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9054324862856323e-05, "loss": 0.8772, "step": 3028 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9053478021852897e-05, "loss": 0.7762, "step": 3029 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9052630820686604e-05, "loss": 0.8284, "step": 3030 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.905178325939115e-05, "loss": 0.8315, "step": 3031 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9050935338000254e-05, "loss": 0.9004, "step": 3032 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9050087056547642e-05, "loss": 0.7997, "step": 3033 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9049238415067064e-05, "loss": 0.8319, "step": 3034 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9048389413592282e-05, "loss": 0.6975, "step": 3035 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9047540052157066e-05, "loss": 0.8901, "step": 3036 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9046690330795207e-05, "loss": 0.8283, "step": 3037 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9045840249540513e-05, "loss": 0.7708, "step": 3038 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9044989808426793e-05, "loss": 0.8212, "step": 3039 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9044139007487886e-05, "loss": 0.7675, "step": 3040 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9043287846757633e-05, "loss": 0.9052, "step": 3041 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.90424363262699e-05, "loss": 0.8085, "step": 3042 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.904158444605856e-05, "loss": 0.8583, "step": 3043 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.90407322061575e-05, "loss": 0.9139, "step": 3044 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9039879606600626e-05, "loss": 0.7657, "step": 3045 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9039026647421854e-05, "loss": 0.8886, "step": 3046 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9038173328655116e-05, "loss": 0.9721, "step": 3047 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.903731965033436e-05, "loss": 0.9382, "step": 3048 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.903646561249355e-05, "loss": 0.79, "step": 3049 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9035611215166655e-05, "loss": 0.9001, "step": 3050 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9034756458387667e-05, "loss": 0.8362, "step": 3051 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.903390134219059e-05, "loss": 0.7905, "step": 3052 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9033045866609443e-05, "loss": 0.9412, "step": 3053 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9032190031678256e-05, "loss": 0.8074, "step": 3054 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9031333837431075e-05, "loss": 0.8399, "step": 3055 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9030477283901965e-05, "loss": 0.9083, "step": 3056 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9029620371125e-05, "loss": 0.7957, "step": 3057 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.902876309913426e-05, "loss": 0.875, "step": 3058 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9027905467963865e-05, "loss": 0.8429, "step": 3059 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.902704747764792e-05, "loss": 0.7655, "step": 3060 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9026189128220565e-05, "loss": 0.8434, "step": 3061 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9025330419715943e-05, "loss": 0.8516, "step": 3062 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9024471352168216e-05, "loss": 1.0158, "step": 3063 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.902361192561156e-05, "loss": 0.8848, "step": 3064 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.902275214008016e-05, "loss": 0.7017, "step": 3065 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.9021891995608225e-05, "loss": 0.745, "step": 3066 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 1.902103149222997e-05, "loss": 0.7614, "step": 3067 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9020170629979632e-05, "loss": 0.7887, "step": 3068 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.901930940889145e-05, "loss": 0.7102, "step": 3069 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.901844782899969e-05, "loss": 0.8739, "step": 3070 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9017585890338626e-05, "loss": 0.9309, "step": 3071 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9016723592942545e-05, "loss": 0.6571, "step": 3072 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9015860936845757e-05, "loss": 0.8473, "step": 3073 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9014997922082575e-05, "loss": 1.0384, "step": 3074 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9014134548687328e-05, "loss": 0.8679, "step": 3075 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9013270816694373e-05, "loss": 0.9326, "step": 3076 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.901240672613806e-05, "loss": 0.8078, "step": 3077 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9011542277052772e-05, "loss": 0.6802, "step": 3078 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9010677469472888e-05, "loss": 0.8317, "step": 3079 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.900981230343282e-05, "loss": 0.8933, "step": 3080 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9008946778966992e-05, "loss": 0.7986, "step": 3081 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.900808089610982e-05, "loss": 0.8223, "step": 3082 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.900721465489576e-05, "loss": 0.7566, "step": 3083 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.900634805535927e-05, "loss": 0.8146, "step": 3084 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9005481097534825e-05, "loss": 0.7676, "step": 3085 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9004613781456917e-05, "loss": 0.7734, "step": 3086 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9003746107160047e-05, "loss": 0.8534, "step": 3087 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.900287807467873e-05, "loss": 0.8185, "step": 3088 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9002009684047504e-05, "loss": 0.9879, "step": 3089 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.9001140935300912e-05, "loss": 1.0567, "step": 3090 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.900027182847351e-05, "loss": 0.8831, "step": 3091 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8999402363599875e-05, "loss": 0.7496, "step": 3092 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8998532540714604e-05, "loss": 0.9098, "step": 3093 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.899766235985229e-05, "loss": 0.8736, "step": 3094 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.899679182104755e-05, "loss": 0.7827, "step": 3095 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8995920924335024e-05, "loss": 0.8822, "step": 3096 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.899504966974935e-05, "loss": 0.9464, "step": 3097 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.899417805732519e-05, "loss": 0.8968, "step": 3098 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.899330608709722e-05, "loss": 0.9451, "step": 3099 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8992433759100128e-05, "loss": 0.8768, "step": 3100 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8991561073368618e-05, "loss": 0.8511, "step": 3101 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8990688029937402e-05, "loss": 0.9837, "step": 3102 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8989814628841215e-05, "loss": 0.8831, "step": 3103 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.89889408701148e-05, "loss": 0.7364, "step": 3104 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.898806675379292e-05, "loss": 0.7608, "step": 3105 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.898719227991035e-05, "loss": 0.8739, "step": 3106 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8986317448501873e-05, "loss": 0.814, "step": 3107 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.898544225960229e-05, "loss": 0.7645, "step": 3108 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8984566713246426e-05, "loss": 0.8941, "step": 3109 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8983690809469105e-05, "loss": 0.8609, "step": 3110 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8982814548305175e-05, "loss": 0.8688, "step": 3111 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.898193792978949e-05, "loss": 0.8016, "step": 3112 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.898106095395693e-05, "loss": 0.8451, "step": 3113 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.898018362084238e-05, "loss": 0.9332, "step": 3114 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.897930593048074e-05, "loss": 0.6987, "step": 3115 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8978427882906925e-05, "loss": 0.7456, "step": 3116 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8977549478155875e-05, "loss": 0.9165, "step": 3117 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8976670716262524e-05, "loss": 0.7923, "step": 3118 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8975791597261834e-05, "loss": 0.8945, "step": 3119 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8974912121188778e-05, "loss": 0.7984, "step": 3120 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.8974032288078344e-05, "loss": 0.9275, "step": 3121 }, { "epoch": 0.56, "grad_norm": 0.0, "learning_rate": 1.897315209796553e-05, "loss": 0.7537, "step": 3122 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8972271550885355e-05, "loss": 0.7716, "step": 3123 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8971390646872847e-05, "loss": 0.8136, "step": 3124 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.897050938596305e-05, "loss": 0.8862, "step": 3125 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8969627768191025e-05, "loss": 0.8949, "step": 3126 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8968745793591833e-05, "loss": 0.7525, "step": 3127 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8967863462200577e-05, "loss": 0.7615, "step": 3128 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.896698077405235e-05, "loss": 1.036, "step": 3129 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.896609772918226e-05, "loss": 0.8694, "step": 3130 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.896521432762545e-05, "loss": 0.9472, "step": 3131 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8964330569417055e-05, "loss": 0.8821, "step": 3132 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8963446454592227e-05, "loss": 0.7419, "step": 3133 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.896256198318615e-05, "loss": 0.8904, "step": 3134 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8961677155234e-05, "loss": 0.7279, "step": 3135 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.896079197077098e-05, "loss": 0.7365, "step": 3136 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8959906429832308e-05, "loss": 0.8823, "step": 3137 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.895902053245321e-05, "loss": 0.8353, "step": 3138 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8958134278668924e-05, "loss": 0.8711, "step": 3139 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.895724766851471e-05, "loss": 0.7184, "step": 3140 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.895636070202584e-05, "loss": 0.7746, "step": 3141 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.89554733792376e-05, "loss": 0.7036, "step": 3142 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8954585700185287e-05, "loss": 0.9293, "step": 3143 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.895369766490421e-05, "loss": 0.8267, "step": 3144 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8952809273429707e-05, "loss": 1.0428, "step": 3145 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.895192052579711e-05, "loss": 0.9573, "step": 3146 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8951031422041786e-05, "loss": 0.7829, "step": 3147 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.895014196219909e-05, "loss": 0.8258, "step": 3148 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.894925214630442e-05, "loss": 0.8636, "step": 3149 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8948361974393166e-05, "loss": 0.9737, "step": 3150 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8947471446500745e-05, "loss": 0.8981, "step": 3151 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.894658056266258e-05, "loss": 0.9303, "step": 3152 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8945689322914113e-05, "loss": 0.7842, "step": 3153 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8944797727290804e-05, "loss": 0.8518, "step": 3154 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8943905775828116e-05, "loss": 0.7753, "step": 3155 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8943013468561534e-05, "loss": 0.7977, "step": 3156 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8942120805526555e-05, "loss": 0.8809, "step": 3157 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.894122778675869e-05, "loss": 0.8224, "step": 3158 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8940334412293472e-05, "loss": 0.6964, "step": 3159 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.893944068216643e-05, "loss": 0.7804, "step": 3160 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8938546596413128e-05, "loss": 0.848, "step": 3161 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8937652155069126e-05, "loss": 0.7698, "step": 3162 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8936757358170013e-05, "loss": 0.7427, "step": 3163 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8935862205751383e-05, "loss": 0.7554, "step": 3164 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8934966697848844e-05, "loss": 0.9328, "step": 3165 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8934070834498025e-05, "loss": 0.8433, "step": 3166 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.893317461573456e-05, "loss": 0.8727, "step": 3167 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8932278041594113e-05, "loss": 0.858, "step": 3168 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8931381112112337e-05, "loss": 0.8127, "step": 3169 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8930483827324925e-05, "loss": 0.8141, "step": 3170 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8929586187267564e-05, "loss": 0.7694, "step": 3171 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8928688191975966e-05, "loss": 0.8452, "step": 3172 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.892778984148586e-05, "loss": 0.866, "step": 3173 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.892689113583298e-05, "loss": 0.7104, "step": 3174 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.8925992075053075e-05, "loss": 0.9014, "step": 3175 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.892509265918191e-05, "loss": 0.8482, "step": 3176 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.892419288825528e-05, "loss": 0.8556, "step": 3177 }, { "epoch": 0.57, "grad_norm": 0.0, "learning_rate": 1.892329276230896e-05, "loss": 0.8612, "step": 3178 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8922392281378773e-05, "loss": 0.8703, "step": 3179 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8921491445500533e-05, "loss": 0.8719, "step": 3180 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8920590254710084e-05, "loss": 0.8209, "step": 3181 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.891968870904327e-05, "loss": 0.8346, "step": 3182 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8918786808535955e-05, "loss": 0.875, "step": 3183 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8917884553224025e-05, "loss": 0.8646, "step": 3184 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8916981943143374e-05, "loss": 0.9386, "step": 3185 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8916078978329905e-05, "loss": 0.7432, "step": 3186 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8915175658819536e-05, "loss": 0.8949, "step": 3187 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.891427198464821e-05, "loss": 0.8982, "step": 3188 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.891336795585187e-05, "loss": 0.8534, "step": 3189 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8912463572466487e-05, "loss": 0.7168, "step": 3190 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8911558834528033e-05, "loss": 0.8436, "step": 3191 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8910653742072504e-05, "loss": 0.8242, "step": 3192 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8909748295135902e-05, "loss": 0.9286, "step": 3193 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.890884249375425e-05, "loss": 0.8739, "step": 3194 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8907936337963582e-05, "loss": 0.8904, "step": 3195 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.890702982779995e-05, "loss": 0.7962, "step": 3196 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8906122963299405e-05, "loss": 0.8322, "step": 3197 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.890521574449804e-05, "loss": 0.7957, "step": 3198 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.890430817143193e-05, "loss": 0.9518, "step": 3199 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8903400244137192e-05, "loss": 0.7907, "step": 3200 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.890249196264994e-05, "loss": 0.8526, "step": 3201 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8901583327006304e-05, "loss": 0.802, "step": 3202 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8900674337242437e-05, "loss": 0.8124, "step": 3203 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8899764993394494e-05, "loss": 0.885, "step": 3204 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8898855295498655e-05, "loss": 0.792, "step": 3205 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8897945243591108e-05, "loss": 0.847, "step": 3206 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8897034837708058e-05, "loss": 0.8482, "step": 3207 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8896124077885715e-05, "loss": 0.9307, "step": 3208 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8895212964160322e-05, "loss": 0.7758, "step": 3209 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8894301496568118e-05, "loss": 0.8439, "step": 3210 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8893389675145362e-05, "loss": 0.8995, "step": 3211 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.889247749992833e-05, "loss": 0.8472, "step": 3212 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.889156497095331e-05, "loss": 0.7426, "step": 3213 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.88906520882566e-05, "loss": 0.7892, "step": 3214 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8889738851874524e-05, "loss": 0.7693, "step": 3215 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8888825261843403e-05, "loss": 0.8513, "step": 3216 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.888791131819959e-05, "loss": 0.7616, "step": 3217 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.888699702097944e-05, "loss": 0.7195, "step": 3218 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.888608237021932e-05, "loss": 0.7855, "step": 3219 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.888516736595562e-05, "loss": 0.9964, "step": 3220 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8884252008224748e-05, "loss": 0.7769, "step": 3221 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8883336297063106e-05, "loss": 0.9089, "step": 3222 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8882420232507127e-05, "loss": 0.8868, "step": 3223 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8881503814593256e-05, "loss": 0.8728, "step": 3224 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8880587043357947e-05, "loss": 0.8083, "step": 3225 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.887966991883768e-05, "loss": 0.7884, "step": 3226 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8878752441068922e-05, "loss": 0.6924, "step": 3227 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8877834610088186e-05, "loss": 0.8341, "step": 3228 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.887691642593198e-05, "loss": 0.8892, "step": 3229 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8875997888636833e-05, "loss": 0.9865, "step": 3230 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8875078998239284e-05, "loss": 0.7635, "step": 3231 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8874159754775888e-05, "loss": 0.8169, "step": 3232 }, { "epoch": 0.58, "grad_norm": 0.0, "learning_rate": 1.8873240158283214e-05, "loss": 0.8577, "step": 3233 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8872320208797847e-05, "loss": 0.9034, "step": 3234 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.887139990635638e-05, "loss": 0.8274, "step": 3235 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8870479250995433e-05, "loss": 0.7765, "step": 3236 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.886955824275162e-05, "loss": 0.761, "step": 3237 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8868636881661588e-05, "loss": 0.8537, "step": 3238 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8867715167761987e-05, "loss": 0.9285, "step": 3239 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.886679310108949e-05, "loss": 0.7059, "step": 3240 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8865870681680765e-05, "loss": 0.7533, "step": 3241 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8864947909572525e-05, "loss": 0.7195, "step": 3242 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8864024784801467e-05, "loss": 0.8353, "step": 3243 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8863101307404318e-05, "loss": 0.9131, "step": 3244 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8862177477417812e-05, "loss": 0.823, "step": 3245 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8861253294878708e-05, "loss": 0.8459, "step": 3246 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.886032875982377e-05, "loss": 0.8126, "step": 3247 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.885940387228977e-05, "loss": 0.9322, "step": 3248 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8858478632313513e-05, "loss": 0.9388, "step": 3249 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8857553039931798e-05, "loss": 0.8687, "step": 3250 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8856627095181447e-05, "loss": 0.8275, "step": 3251 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8855700798099298e-05, "loss": 0.8234, "step": 3252 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8854774148722204e-05, "loss": 0.9728, "step": 3253 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8853847147087025e-05, "loss": 0.7314, "step": 3254 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8852919793230637e-05, "loss": 0.7907, "step": 3255 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8851992087189933e-05, "loss": 0.8153, "step": 3256 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8851064029001823e-05, "loss": 0.7319, "step": 3257 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.885013561870322e-05, "loss": 0.8232, "step": 3258 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.884920685633106e-05, "loss": 0.8247, "step": 3259 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8848277741922295e-05, "loss": 0.7352, "step": 3260 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8847348275513885e-05, "loss": 0.8489, "step": 3261 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.88464184571428e-05, "loss": 0.7729, "step": 3262 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8845488286846035e-05, "loss": 0.7959, "step": 3263 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8844557764660594e-05, "loss": 0.7773, "step": 3264 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8843626890623494e-05, "loss": 0.8106, "step": 3265 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8842695664771763e-05, "loss": 0.8955, "step": 3266 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8841764087142453e-05, "loss": 0.8581, "step": 3267 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.884083215777262e-05, "loss": 0.8259, "step": 3268 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8839899876699337e-05, "loss": 0.8402, "step": 3269 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8838967243959695e-05, "loss": 0.8768, "step": 3270 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8838034259590795e-05, "loss": 0.8704, "step": 3271 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.883710092362975e-05, "loss": 0.9023, "step": 3272 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.883616723611369e-05, "loss": 0.8458, "step": 3273 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8835233197079765e-05, "loss": 0.7608, "step": 3274 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8834298806565126e-05, "loss": 0.8612, "step": 3275 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8833364064606943e-05, "loss": 0.7555, "step": 3276 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.883242897124241e-05, "loss": 0.8393, "step": 3277 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.883149352650872e-05, "loss": 0.8791, "step": 3278 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.883055773044309e-05, "loss": 0.8796, "step": 3279 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8829621583082743e-05, "loss": 0.8987, "step": 3280 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8828685084464923e-05, "loss": 0.8014, "step": 3281 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8827748234626887e-05, "loss": 0.8263, "step": 3282 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8826811033605904e-05, "loss": 0.8795, "step": 3283 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8825873481439255e-05, "loss": 0.8167, "step": 3284 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.882493557816424e-05, "loss": 0.8571, "step": 3285 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.882399732381817e-05, "loss": 0.7589, "step": 3286 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8823058718438375e-05, "loss": 0.9302, "step": 3287 }, { "epoch": 0.59, "grad_norm": 0.0, "learning_rate": 1.8822119762062182e-05, "loss": 0.9235, "step": 3288 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.882118045472696e-05, "loss": 0.7243, "step": 3289 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.882024079647006e-05, "loss": 0.9022, "step": 3290 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8819300787328872e-05, "loss": 0.9428, "step": 3291 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8818360427340793e-05, "loss": 0.8603, "step": 3292 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8817419716543228e-05, "loss": 0.8098, "step": 3293 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8816478654973605e-05, "loss": 0.8124, "step": 3294 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8815537242669353e-05, "loss": 0.8472, "step": 3295 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.881459547966793e-05, "loss": 0.7331, "step": 3296 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.88136533660068e-05, "loss": 0.8514, "step": 3297 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.881271090172344e-05, "loss": 0.8948, "step": 3298 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8811768086855342e-05, "loss": 0.8343, "step": 3299 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8810824921440013e-05, "loss": 0.81, "step": 3300 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8809881405514976e-05, "loss": 0.7546, "step": 3301 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8808937539117765e-05, "loss": 0.852, "step": 3302 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.880799332228593e-05, "loss": 0.8623, "step": 3303 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.880704875505703e-05, "loss": 0.8678, "step": 3304 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8806103837468642e-05, "loss": 0.8393, "step": 3305 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8805158569558356e-05, "loss": 0.8177, "step": 3306 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8804212951363782e-05, "loss": 0.7532, "step": 3307 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.880326698292253e-05, "loss": 0.8242, "step": 3308 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8802320664272242e-05, "loss": 0.9647, "step": 3309 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8801373995450557e-05, "loss": 0.8887, "step": 3310 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8800426976495135e-05, "loss": 0.7668, "step": 3311 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8799479607443652e-05, "loss": 0.8489, "step": 3312 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8798531888333797e-05, "loss": 0.8793, "step": 3313 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.879758381920327e-05, "loss": 0.7532, "step": 3314 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.879663540008979e-05, "loss": 0.8033, "step": 3315 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.879568663103108e-05, "loss": 0.8467, "step": 3316 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.879473751206489e-05, "loss": 0.9316, "step": 3317 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8793788043228977e-05, "loss": 0.7824, "step": 3318 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.879283822456111e-05, "loss": 0.8672, "step": 3319 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8791888056099076e-05, "loss": 0.8311, "step": 3320 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8790937537880675e-05, "loss": 0.7617, "step": 3321 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8789986669943714e-05, "loss": 0.7057, "step": 3322 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.878903545232603e-05, "loss": 0.7023, "step": 3323 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.878808388506546e-05, "loss": 0.7324, "step": 3324 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8787131968199855e-05, "loss": 0.8534, "step": 3325 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8786179701767087e-05, "loss": 0.8611, "step": 3326 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.878522708580504e-05, "loss": 0.8853, "step": 3327 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8784274120351613e-05, "loss": 0.7273, "step": 3328 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.878332080544471e-05, "loss": 0.8061, "step": 3329 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8782367141122257e-05, "loss": 0.8869, "step": 3330 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8781413127422194e-05, "loss": 0.7529, "step": 3331 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8780458764382477e-05, "loss": 0.9485, "step": 3332 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8779504052041064e-05, "loss": 0.8923, "step": 3333 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.877854899043594e-05, "loss": 0.73, "step": 3334 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8777593579605105e-05, "loss": 0.5984, "step": 3335 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8776637819586556e-05, "loss": 0.8247, "step": 3336 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8775681710418322e-05, "loss": 0.9455, "step": 3337 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8774725252138432e-05, "loss": 0.7913, "step": 3338 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.877376844478494e-05, "loss": 0.8684, "step": 3339 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.8772811288395912e-05, "loss": 0.916, "step": 3340 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.877185378300942e-05, "loss": 0.9042, "step": 3341 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.877089592866356e-05, "loss": 0.8195, "step": 3342 }, { "epoch": 0.6, "grad_norm": 0.0, "learning_rate": 1.876993772539643e-05, "loss": 0.9578, "step": 3343 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8768979173246154e-05, "loss": 0.8988, "step": 3344 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.876802027225087e-05, "loss": 0.9336, "step": 3345 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8767061022448713e-05, "loss": 0.8382, "step": 3346 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8766101423877858e-05, "loss": 0.9322, "step": 3347 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.876514147657646e-05, "loss": 0.9418, "step": 3348 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8764181180582725e-05, "loss": 0.8179, "step": 3349 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.876322053593485e-05, "loss": 0.7819, "step": 3350 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8762259542671047e-05, "loss": 0.9021, "step": 3351 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.876129820082955e-05, "loss": 0.8737, "step": 3352 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.87603365104486e-05, "loss": 0.789, "step": 3353 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8759374471566458e-05, "loss": 0.6921, "step": 3354 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8758412084221393e-05, "loss": 0.9843, "step": 3355 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.875744934845169e-05, "loss": 0.7104, "step": 3356 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8756486264295653e-05, "loss": 0.9484, "step": 3357 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.875552283179159e-05, "loss": 0.804, "step": 3358 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.875455905097783e-05, "loss": 0.8221, "step": 3359 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8753594921892713e-05, "loss": 0.8688, "step": 3360 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8752630444574596e-05, "loss": 0.9008, "step": 3361 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8751665619061846e-05, "loss": 0.863, "step": 3362 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.875070044539284e-05, "loss": 0.9448, "step": 3363 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8749734923605983e-05, "loss": 0.793, "step": 3364 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8748769053739684e-05, "loss": 0.8441, "step": 3365 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8747802835832362e-05, "loss": 0.9347, "step": 3366 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8746836269922458e-05, "loss": 0.8238, "step": 3367 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.874586935604842e-05, "loss": 0.9367, "step": 3368 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8744902094248722e-05, "loss": 0.8585, "step": 3369 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8743934484561835e-05, "loss": 0.8924, "step": 3370 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8742966527026255e-05, "loss": 0.8971, "step": 3371 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8741998221680488e-05, "loss": 0.7599, "step": 3372 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8741029568563054e-05, "loss": 0.9741, "step": 3373 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8740060567712497e-05, "loss": 0.9042, "step": 3374 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8739091219167353e-05, "loss": 0.9684, "step": 3375 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8738121522966192e-05, "loss": 0.8781, "step": 3376 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8737151479147582e-05, "loss": 0.811, "step": 3377 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8736181087750123e-05, "loss": 1.0382, "step": 3378 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8735210348812413e-05, "loss": 0.818, "step": 3379 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8734239262373074e-05, "loss": 0.9561, "step": 3380 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.873326782847073e-05, "loss": 0.8973, "step": 3381 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8732296047144034e-05, "loss": 0.9559, "step": 3382 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8731323918431642e-05, "loss": 0.8057, "step": 3383 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8730351442372225e-05, "loss": 0.993, "step": 3384 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8729378619004472e-05, "loss": 0.7736, "step": 3385 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8728405448367084e-05, "loss": 0.7832, "step": 3386 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8727431930498775e-05, "loss": 0.8626, "step": 3387 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8726458065438272e-05, "loss": 0.8038, "step": 3388 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.872548385322432e-05, "loss": 0.781, "step": 3389 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.872450929389567e-05, "loss": 0.9153, "step": 3390 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8723534387491096e-05, "loss": 0.773, "step": 3391 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8722559134049378e-05, "loss": 0.9383, "step": 3392 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8721583533609317e-05, "loss": 0.7748, "step": 3393 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.872060758620972e-05, "loss": 0.9846, "step": 3394 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8719631291889418e-05, "loss": 0.7433, "step": 3395 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8718654650687242e-05, "loss": 0.8014, "step": 3396 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.871767766264205e-05, "loss": 0.9404, "step": 3397 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8716700327792704e-05, "loss": 0.807, "step": 3398 }, { "epoch": 0.61, "grad_norm": 0.0, "learning_rate": 1.8715722646178092e-05, "loss": 0.8367, "step": 3399 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8714744617837097e-05, "loss": 0.8487, "step": 3400 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8713766242808632e-05, "loss": 0.7813, "step": 3401 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8712787521131623e-05, "loss": 0.7594, "step": 3402 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8711808452844994e-05, "loss": 0.7872, "step": 3403 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8710829037987706e-05, "loss": 0.9578, "step": 3404 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8709849276598713e-05, "loss": 0.7889, "step": 3405 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8708869168716998e-05, "loss": 0.7716, "step": 3406 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8707888714381545e-05, "loss": 0.8459, "step": 3407 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.870690791363136e-05, "loss": 0.9308, "step": 3408 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.870592676650547e-05, "loss": 0.9009, "step": 3409 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8704945273042894e-05, "loss": 0.8496, "step": 3410 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.870396343328268e-05, "loss": 0.7889, "step": 3411 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8702981247263895e-05, "loss": 0.8091, "step": 3412 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8701998715025603e-05, "loss": 0.9107, "step": 3413 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.87010158366069e-05, "loss": 0.7906, "step": 3414 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.870003261204688e-05, "loss": 0.8449, "step": 3415 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8699049041384657e-05, "loss": 0.7747, "step": 3416 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.869806512465936e-05, "loss": 0.7872, "step": 3417 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8697080861910132e-05, "loss": 0.7961, "step": 3418 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8696096253176126e-05, "loss": 0.7226, "step": 3419 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.869511129849652e-05, "loss": 0.746, "step": 3420 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8694125997910487e-05, "loss": 0.7721, "step": 3421 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8693140351457228e-05, "loss": 0.8485, "step": 3422 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8692154359175957e-05, "loss": 0.8597, "step": 3423 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8691168021105893e-05, "loss": 0.8278, "step": 3424 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8690181337286274e-05, "loss": 0.8157, "step": 3425 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8689194307756354e-05, "loss": 0.7685, "step": 3426 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8688206932555402e-05, "loss": 0.8853, "step": 3427 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8687219211722693e-05, "loss": 0.8161, "step": 3428 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8686231145297523e-05, "loss": 0.7903, "step": 3429 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8685242733319198e-05, "loss": 0.842, "step": 3430 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8684253975827036e-05, "loss": 0.8384, "step": 3431 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8683264872860377e-05, "loss": 0.9102, "step": 3432 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8682275424458565e-05, "loss": 0.8291, "step": 3433 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8681285630660962e-05, "loss": 0.8114, "step": 3434 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8680295491506942e-05, "loss": 0.7762, "step": 3435 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8679305007035902e-05, "loss": 0.843, "step": 3436 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.867831417728724e-05, "loss": 0.8644, "step": 3437 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8677323002300373e-05, "loss": 0.8276, "step": 3438 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.867633148211473e-05, "loss": 0.9524, "step": 3439 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.867533961676976e-05, "loss": 0.8446, "step": 3440 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8674347406304914e-05, "loss": 0.8569, "step": 3441 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.867335485075967e-05, "loss": 0.8478, "step": 3442 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8672361950173514e-05, "loss": 0.9302, "step": 3443 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.867136870458594e-05, "loss": 0.8421, "step": 3444 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8670375114036464e-05, "loss": 0.8529, "step": 3445 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8669381178564617e-05, "loss": 0.6839, "step": 3446 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8668386898209933e-05, "loss": 0.9982, "step": 3447 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8667392273011965e-05, "loss": 0.9695, "step": 3448 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.866639730301029e-05, "loss": 0.857, "step": 3449 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8665401988244482e-05, "loss": 0.7373, "step": 3450 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8664406328754138e-05, "loss": 0.8402, "step": 3451 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8663410324578867e-05, "loss": 0.8142, "step": 3452 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8662413975758287e-05, "loss": 0.7649, "step": 3453 }, { "epoch": 0.62, "grad_norm": 0.0, "learning_rate": 1.8661417282332047e-05, "loss": 0.8244, "step": 3454 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8660420244339786e-05, "loss": 0.9701, "step": 3455 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8659422861821173e-05, "loss": 0.8303, "step": 3456 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8658425134815884e-05, "loss": 0.7802, "step": 3457 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.865742706336361e-05, "loss": 0.8673, "step": 3458 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.865642864750406e-05, "loss": 0.7844, "step": 3459 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.865542988727694e-05, "loss": 0.792, "step": 3460 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8654430782722e-05, "loss": 0.9362, "step": 3461 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8653431333878976e-05, "loss": 0.8282, "step": 3462 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.865243154078763e-05, "loss": 0.8987, "step": 3463 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.865143140348773e-05, "loss": 0.8063, "step": 3464 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8650430922019072e-05, "loss": 0.7864, "step": 3465 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8649430096421454e-05, "loss": 0.6981, "step": 3466 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8648428926734684e-05, "loss": 0.8124, "step": 3467 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.86474274129986e-05, "loss": 0.8894, "step": 3468 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8646425555253038e-05, "loss": 0.807, "step": 3469 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8645423353537856e-05, "loss": 0.784, "step": 3470 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8644420807892922e-05, "loss": 0.8167, "step": 3471 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.864341791835812e-05, "loss": 0.7861, "step": 3472 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8642414684973347e-05, "loss": 0.9062, "step": 3473 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8641411107778513e-05, "loss": 0.9233, "step": 3474 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.864040718681354e-05, "loss": 0.7689, "step": 3475 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.863940292211837e-05, "loss": 0.8356, "step": 3476 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.863839831373295e-05, "loss": 0.7607, "step": 3477 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8637393361697244e-05, "loss": 0.8349, "step": 3478 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8636388066051237e-05, "loss": 0.8572, "step": 3479 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8635382426834914e-05, "loss": 0.8152, "step": 3480 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8634376444088288e-05, "loss": 1.0299, "step": 3481 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8633370117851375e-05, "loss": 0.6968, "step": 3482 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8632363448164208e-05, "loss": 0.6607, "step": 3483 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8631356435066837e-05, "loss": 0.6926, "step": 3484 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.863034907859932e-05, "loss": 0.9314, "step": 3485 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.862934137880173e-05, "loss": 0.7446, "step": 3486 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8628333335714156e-05, "loss": 0.939, "step": 3487 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8627324949376705e-05, "loss": 0.7646, "step": 3488 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8626316219829487e-05, "loss": 0.7713, "step": 3489 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8625307147112633e-05, "loss": 0.7858, "step": 3490 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.862429773126628e-05, "loss": 0.9021, "step": 3491 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8623287972330594e-05, "loss": 1.0224, "step": 3492 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8622277870345737e-05, "loss": 0.9552, "step": 3493 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8621267425351896e-05, "loss": 0.9373, "step": 3494 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.862025663738927e-05, "loss": 0.7295, "step": 3495 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8619245506498065e-05, "loss": 0.8047, "step": 3496 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8618234032718508e-05, "loss": 0.8382, "step": 3497 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.861722221609084e-05, "loss": 0.922, "step": 3498 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8616210056655307e-05, "loss": 0.8745, "step": 3499 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.861519755445218e-05, "loss": 0.7999, "step": 3500 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8614184709521734e-05, "loss": 0.7832, "step": 3501 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8613171521904264e-05, "loss": 0.7819, "step": 3502 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8612157991640076e-05, "loss": 0.7932, "step": 3503 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8611144118769492e-05, "loss": 0.816, "step": 3504 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8610129903332838e-05, "loss": 0.8732, "step": 3505 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.860911534537047e-05, "loss": 0.9227, "step": 3506 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8608100444922743e-05, "loss": 0.7815, "step": 3507 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.860708520203004e-05, "loss": 0.9959, "step": 3508 }, { "epoch": 0.63, "grad_norm": 0.0, "learning_rate": 1.8606069616732732e-05, "loss": 0.7761, "step": 3509 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.860505368907124e-05, "loss": 0.8385, "step": 3510 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.860403741908597e-05, "loss": 0.9494, "step": 3511 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.860302080681735e-05, "loss": 0.7465, "step": 3512 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8602003852305826e-05, "loss": 0.9142, "step": 3513 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.860098655559185e-05, "loss": 0.8978, "step": 3514 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.85999689167159e-05, "loss": 0.7041, "step": 3515 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8598950935718448e-05, "loss": 0.7482, "step": 3516 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.859793261264e-05, "loss": 0.857, "step": 3517 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8596913947521064e-05, "loss": 0.8502, "step": 3518 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.859589494040216e-05, "loss": 0.7781, "step": 3519 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8594875591323833e-05, "loss": 0.899, "step": 3520 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8593855900326633e-05, "loss": 0.9241, "step": 3521 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.859283586745112e-05, "loss": 0.9765, "step": 3522 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8591815492737874e-05, "loss": 0.8673, "step": 3523 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8590794776227493e-05, "loss": 0.8652, "step": 3524 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8589773717960578e-05, "loss": 0.8594, "step": 3525 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.858875231797775e-05, "loss": 0.8309, "step": 3526 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.858773057631964e-05, "loss": 0.7824, "step": 3527 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8586708493026895e-05, "loss": 0.8087, "step": 3528 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8585686068140178e-05, "loss": 0.7427, "step": 3529 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8584663301700163e-05, "loss": 0.9093, "step": 3530 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8583640193747534e-05, "loss": 0.8737, "step": 3531 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8582616744322994e-05, "loss": 0.9976, "step": 3532 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8581592953467255e-05, "loss": 0.9055, "step": 3533 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8580568821221047e-05, "loss": 0.9664, "step": 3534 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8579544347625114e-05, "loss": 0.903, "step": 3535 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8578519532720207e-05, "loss": 0.8707, "step": 3536 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.85774943765471e-05, "loss": 0.6513, "step": 3537 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.857646887914657e-05, "loss": 0.9378, "step": 3538 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8575443040559416e-05, "loss": 0.8324, "step": 3539 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8574416860826452e-05, "loss": 0.7994, "step": 3540 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.857339033998849e-05, "loss": 0.7993, "step": 3541 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.857236347808638e-05, "loss": 0.75, "step": 3542 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.857133627516096e-05, "loss": 0.7995, "step": 3543 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.85703087312531e-05, "loss": 0.842, "step": 3544 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.856928084640368e-05, "loss": 0.9096, "step": 3545 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8568252620653585e-05, "loss": 0.7907, "step": 3546 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8567224054043724e-05, "loss": 0.7864, "step": 3547 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8566195146615015e-05, "loss": 0.9012, "step": 3548 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8565165898408383e-05, "loss": 0.7019, "step": 3549 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.856413630946478e-05, "loss": 1.0449, "step": 3550 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8563106379825167e-05, "loss": 0.8761, "step": 3551 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8562076109530512e-05, "loss": 0.7994, "step": 3552 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.85610454986218e-05, "loss": 0.9208, "step": 3553 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8560014547140035e-05, "loss": 0.8438, "step": 3554 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8558983255126225e-05, "loss": 0.6942, "step": 3555 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.85579516226214e-05, "loss": 0.7862, "step": 3556 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8556919649666597e-05, "loss": 0.7421, "step": 3557 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8555887336302872e-05, "loss": 0.8485, "step": 3558 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8554854682571294e-05, "loss": 0.6505, "step": 3559 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.855382168851294e-05, "loss": 0.8145, "step": 3560 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8552788354168907e-05, "loss": 0.838, "step": 3561 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.85517546795803e-05, "loss": 0.7539, "step": 3562 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8550720664788242e-05, "loss": 0.8091, "step": 3563 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8549686309833866e-05, "loss": 0.9185, "step": 3564 }, { "epoch": 0.64, "grad_norm": 0.0, "learning_rate": 1.8548651614758322e-05, "loss": 0.8755, "step": 3565 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8547616579602778e-05, "loss": 0.7851, "step": 3566 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8546581204408397e-05, "loss": 0.8188, "step": 3567 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8545545489216378e-05, "loss": 0.7481, "step": 3568 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.854450943406792e-05, "loss": 0.8662, "step": 3569 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.854347303900424e-05, "loss": 0.7745, "step": 3570 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.854243630406656e-05, "loss": 0.8456, "step": 3571 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8541399229296138e-05, "loss": 0.8057, "step": 3572 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.854036181473422e-05, "loss": 0.975, "step": 3573 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.853932406042208e-05, "loss": 0.9164, "step": 3574 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8538285966400995e-05, "loss": 0.8883, "step": 3575 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.853724753271227e-05, "loss": 0.6793, "step": 3576 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8536208759397213e-05, "loss": 0.8278, "step": 3577 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.853516964649715e-05, "loss": 0.8587, "step": 3578 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8534130194053417e-05, "loss": 0.8616, "step": 3579 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.853309040210736e-05, "loss": 0.8635, "step": 3580 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8532050270700354e-05, "loss": 0.8226, "step": 3581 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8531009799873772e-05, "loss": 0.8425, "step": 3582 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8529968989669002e-05, "loss": 0.8808, "step": 3583 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.852892784012746e-05, "loss": 0.8915, "step": 3584 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.852788635129055e-05, "loss": 0.8505, "step": 3585 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8526844523199713e-05, "loss": 0.7791, "step": 3586 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8525802355896397e-05, "loss": 0.9035, "step": 3587 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8524759849422055e-05, "loss": 0.9089, "step": 3588 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8523717003818164e-05, "loss": 0.8229, "step": 3589 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8522673819126208e-05, "loss": 0.9042, "step": 3590 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8521630295387688e-05, "loss": 0.7314, "step": 3591 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8520586432644116e-05, "loss": 0.8596, "step": 3592 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.851954223093702e-05, "loss": 0.8519, "step": 3593 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.851849769030794e-05, "loss": 0.931, "step": 3594 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8517452810798425e-05, "loss": 0.8937, "step": 3595 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.851640759245005e-05, "loss": 0.7086, "step": 3596 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.851536203530439e-05, "loss": 0.7788, "step": 3597 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.851431613940304e-05, "loss": 0.8644, "step": 3598 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8513269904787607e-05, "loss": 0.8668, "step": 3599 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.851222333149972e-05, "loss": 0.8542, "step": 3600 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8511176419580997e-05, "loss": 0.7768, "step": 3601 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8510129169073103e-05, "loss": 0.9157, "step": 3602 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8509081580017686e-05, "loss": 0.9066, "step": 3603 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8508033652456428e-05, "loss": 0.825, "step": 3604 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.850698538643102e-05, "loss": 0.8718, "step": 3605 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.850593678198316e-05, "loss": 0.7678, "step": 3606 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.850488783915456e-05, "loss": 0.9578, "step": 3607 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8503838557986953e-05, "loss": 0.7418, "step": 3608 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.850278893852208e-05, "loss": 0.9379, "step": 3609 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.85017389808017e-05, "loss": 0.8234, "step": 3610 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8500688684867576e-05, "loss": 0.76, "step": 3611 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8499638050761494e-05, "loss": 0.8309, "step": 3612 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.849858707852525e-05, "loss": 0.7722, "step": 3613 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8497535768200654e-05, "loss": 0.8597, "step": 3614 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8496484119829525e-05, "loss": 0.7895, "step": 3615 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8495432133453707e-05, "loss": 0.8024, "step": 3616 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8494379809115043e-05, "loss": 0.6397, "step": 3617 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8493327146855396e-05, "loss": 0.7969, "step": 3618 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.8492274146716648e-05, "loss": 0.7894, "step": 3619 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 1.849122080874069e-05, "loss": 0.876, "step": 3620 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8490167132969415e-05, "loss": 0.8076, "step": 3621 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8489113119444752e-05, "loss": 0.8348, "step": 3622 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8488058768208622e-05, "loss": 0.8652, "step": 3623 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8487004079302977e-05, "loss": 0.7844, "step": 3624 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.848594905276977e-05, "loss": 0.7453, "step": 3625 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.848489368865097e-05, "loss": 0.8468, "step": 3626 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8483837986988566e-05, "loss": 0.9193, "step": 3627 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8482781947824552e-05, "loss": 0.8345, "step": 3628 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8481725571200942e-05, "loss": 0.8936, "step": 3629 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.848066885715976e-05, "loss": 0.9453, "step": 3630 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8479611805743037e-05, "loss": 0.8502, "step": 3631 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8478554416992836e-05, "loss": 0.8887, "step": 3632 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8477496690951212e-05, "loss": 0.7602, "step": 3633 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.847643862766025e-05, "loss": 0.7679, "step": 3634 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8475380227162037e-05, "loss": 0.9459, "step": 3635 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8474321489498678e-05, "loss": 0.7682, "step": 3636 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8473262414712295e-05, "loss": 0.9104, "step": 3637 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8472203002845015e-05, "loss": 0.7151, "step": 3638 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.847114325393899e-05, "loss": 0.8006, "step": 3639 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.847008316803637e-05, "loss": 0.7205, "step": 3640 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8469022745179335e-05, "loss": 0.8439, "step": 3641 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8467961985410064e-05, "loss": 0.7389, "step": 3642 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.846690088877076e-05, "loss": 0.8773, "step": 3643 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8465839455303633e-05, "loss": 0.7718, "step": 3644 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8464777685050914e-05, "loss": 0.9426, "step": 3645 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.846371557805483e-05, "loss": 0.8141, "step": 3646 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.846265313435765e-05, "loss": 0.8228, "step": 3647 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8461590354001627e-05, "loss": 0.8123, "step": 3648 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8460527237029042e-05, "loss": 0.7805, "step": 3649 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8459463783482193e-05, "loss": 0.7882, "step": 3650 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.845839999340338e-05, "loss": 0.7048, "step": 3651 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8457335866834927e-05, "loss": 0.7705, "step": 3652 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8456271403819165e-05, "loss": 0.9668, "step": 3653 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.845520660439844e-05, "loss": 0.8944, "step": 3654 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8454141468615112e-05, "loss": 0.8965, "step": 3655 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8453075996511555e-05, "loss": 0.845, "step": 3656 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.845201018813015e-05, "loss": 0.7955, "step": 3657 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8450944043513302e-05, "loss": 0.7639, "step": 3658 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8449877562703426e-05, "loss": 0.8518, "step": 3659 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.844881074574294e-05, "loss": 0.9266, "step": 3660 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8447743592674293e-05, "loss": 0.9071, "step": 3661 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8446676103539934e-05, "loss": 0.9298, "step": 3662 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.844560827838233e-05, "loss": 0.8218, "step": 3663 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8444540117243958e-05, "loss": 0.9439, "step": 3664 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8443471620167313e-05, "loss": 0.8946, "step": 3665 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8442402787194905e-05, "loss": 0.827, "step": 3666 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8441333618369247e-05, "loss": 0.7365, "step": 3667 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8440264113732884e-05, "loss": 0.8101, "step": 3668 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8439194273328352e-05, "loss": 0.7637, "step": 3669 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8438124097198214e-05, "loss": 0.8796, "step": 3670 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8437053585385044e-05, "loss": 0.9479, "step": 3671 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.843598273793143e-05, "loss": 0.8122, "step": 3672 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.843491155487997e-05, "loss": 0.8586, "step": 3673 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.843384003627328e-05, "loss": 0.839, "step": 3674 }, { "epoch": 0.66, "grad_norm": 0.0, "learning_rate": 1.8432768182153985e-05, "loss": 0.7206, "step": 3675 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8431695992564725e-05, "loss": 0.6882, "step": 3676 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8430623467548156e-05, "loss": 0.9586, "step": 3677 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8429550607146942e-05, "loss": 0.93, "step": 3678 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8428477411403762e-05, "loss": 0.8586, "step": 3679 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8427403880361313e-05, "loss": 0.7981, "step": 3680 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.84263300140623e-05, "loss": 0.8514, "step": 3681 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8425255812549448e-05, "loss": 0.7811, "step": 3682 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8424181275865482e-05, "loss": 0.767, "step": 3683 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8423106404053154e-05, "loss": 0.7689, "step": 3684 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8422031197155225e-05, "loss": 0.77, "step": 3685 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8420955655214468e-05, "loss": 0.9492, "step": 3686 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8419879778273668e-05, "loss": 0.8502, "step": 3687 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8418803566375626e-05, "loss": 0.9824, "step": 3688 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8417727019563156e-05, "loss": 0.8821, "step": 3689 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8416650137879087e-05, "loss": 0.8734, "step": 3690 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8415572921366255e-05, "loss": 0.8235, "step": 3691 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8414495370067517e-05, "loss": 0.7835, "step": 3692 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8413417484025738e-05, "loss": 0.8904, "step": 3693 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8412339263283793e-05, "loss": 0.8458, "step": 3694 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.841126070788459e-05, "loss": 0.8126, "step": 3695 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.841018181787102e-05, "loss": 0.9012, "step": 3696 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8409102593286012e-05, "loss": 0.7721, "step": 3697 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8408023034172498e-05, "loss": 0.8227, "step": 3698 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.840694314057342e-05, "loss": 0.7607, "step": 3699 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8405862912531747e-05, "loss": 0.7738, "step": 3700 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8404782350090448e-05, "loss": 0.8147, "step": 3701 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8403701453292505e-05, "loss": 0.8936, "step": 3702 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8402620222180923e-05, "loss": 0.8439, "step": 3703 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8401538656798713e-05, "loss": 0.6857, "step": 3704 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.840045675718891e-05, "loss": 0.9017, "step": 3705 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8399374523394534e-05, "loss": 0.8494, "step": 3706 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.839829195545866e-05, "loss": 0.78, "step": 3707 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8397209053424342e-05, "loss": 0.9344, "step": 3708 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.839612581733466e-05, "loss": 0.7694, "step": 3709 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8395042247232716e-05, "loss": 0.8538, "step": 3710 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8393958343161604e-05, "loss": 0.7902, "step": 3711 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.839287410516445e-05, "loss": 0.6826, "step": 3712 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.839178953328439e-05, "loss": 0.8473, "step": 3713 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8390704627564563e-05, "loss": 0.8874, "step": 3714 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8389619388048134e-05, "loss": 0.782, "step": 3715 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.838853381477827e-05, "loss": 1.0413, "step": 3716 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8387447907798167e-05, "loss": 0.6941, "step": 3717 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.838636166715101e-05, "loss": 0.819, "step": 3718 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8385275092880026e-05, "loss": 0.7759, "step": 3719 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.838418818502843e-05, "loss": 0.7389, "step": 3720 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8383100943639467e-05, "loss": 0.8323, "step": 3721 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.838201336875639e-05, "loss": 0.8225, "step": 3722 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8380925460422455e-05, "loss": 0.8313, "step": 3723 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.837983721868095e-05, "loss": 0.7998, "step": 3724 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8378748643575168e-05, "loss": 0.9456, "step": 3725 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.837765973514841e-05, "loss": 0.7728, "step": 3726 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8376570493443994e-05, "loss": 0.7805, "step": 3727 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.837548091850526e-05, "loss": 0.8149, "step": 3728 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.837439101037554e-05, "loss": 0.8081, "step": 3729 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 1.8373300769098204e-05, "loss": 0.8445, "step": 3730 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.837221019471662e-05, "loss": 0.8145, "step": 3731 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8371119287274165e-05, "loss": 0.8896, "step": 3732 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.837002804681425e-05, "loss": 0.8272, "step": 3733 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8368936473380278e-05, "loss": 0.8569, "step": 3734 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.836784456701568e-05, "loss": 0.8521, "step": 3735 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8366752327763885e-05, "loss": 0.7645, "step": 3736 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8365659755668356e-05, "loss": 0.89, "step": 3737 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8364566850772544e-05, "loss": 0.792, "step": 3738 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8363473613119938e-05, "loss": 0.818, "step": 3739 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8362380042754023e-05, "loss": 0.8373, "step": 3740 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8361286139718303e-05, "loss": 0.9259, "step": 3741 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.83601919040563e-05, "loss": 0.9812, "step": 3742 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.835909733581154e-05, "loss": 0.7439, "step": 3743 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8358002435027565e-05, "loss": 0.8835, "step": 3744 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8356907201747937e-05, "loss": 0.8629, "step": 3745 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.835581163601622e-05, "loss": 0.8138, "step": 3746 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.835471573787601e-05, "loss": 0.9141, "step": 3747 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.835361950737089e-05, "loss": 0.891, "step": 3748 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.835252294454448e-05, "loss": 0.7226, "step": 3749 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.835142604944039e-05, "loss": 0.9709, "step": 3750 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8350328822102277e-05, "loss": 0.8351, "step": 3751 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.834923126257377e-05, "loss": 0.8118, "step": 3752 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8348133370898545e-05, "loss": 0.7578, "step": 3753 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.834703514712027e-05, "loss": 0.9671, "step": 3754 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8345936591282637e-05, "loss": 0.9968, "step": 3755 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8344837703429352e-05, "loss": 0.777, "step": 3756 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8343738483604126e-05, "loss": 0.918, "step": 3757 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.834263893185069e-05, "loss": 0.9259, "step": 3758 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8341539048212787e-05, "loss": 0.8001, "step": 3759 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.834043883273417e-05, "loss": 0.8034, "step": 3760 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8339338285458607e-05, "loss": 0.92, "step": 3761 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.833823740642988e-05, "loss": 0.8914, "step": 3762 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8337136195691786e-05, "loss": 0.8523, "step": 3763 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.833603465328813e-05, "loss": 0.745, "step": 3764 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.833493277926274e-05, "loss": 0.7244, "step": 3765 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8333830573659443e-05, "loss": 0.8279, "step": 3766 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8332728036522087e-05, "loss": 0.7685, "step": 3767 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8331625167894538e-05, "loss": 0.7605, "step": 3768 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8330521967820666e-05, "loss": 0.8625, "step": 3769 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8329418436344358e-05, "loss": 0.7891, "step": 3770 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8328314573509515e-05, "loss": 0.7801, "step": 3771 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8327210379360053e-05, "loss": 0.8069, "step": 3772 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.83261058539399e-05, "loss": 0.9182, "step": 3773 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8325000997292985e-05, "loss": 0.8751, "step": 3774 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8323895809463274e-05, "loss": 0.8181, "step": 3775 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8322790290494726e-05, "loss": 0.7836, "step": 3776 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8321684440431326e-05, "loss": 0.8871, "step": 3777 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8320578259317055e-05, "loss": 0.9, "step": 3778 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8319471747195935e-05, "loss": 0.8223, "step": 3779 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8318364904111972e-05, "loss": 0.9166, "step": 3780 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8317257730109208e-05, "loss": 0.7535, "step": 3781 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.831615022523168e-05, "loss": 0.8423, "step": 3782 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.831504238952345e-05, "loss": 0.8132, "step": 3783 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.831393422302859e-05, "loss": 0.7936, "step": 3784 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8312825725791182e-05, "loss": 0.8991, "step": 3785 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 1.8311716897855327e-05, "loss": 0.8174, "step": 3786 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8310607739265135e-05, "loss": 0.8165, "step": 3787 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.830949825006473e-05, "loss": 0.792, "step": 3788 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8308388430298253e-05, "loss": 0.8372, "step": 3789 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8307278280009848e-05, "loss": 0.8668, "step": 3790 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.830616779924368e-05, "loss": 0.8085, "step": 3791 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.830505698804393e-05, "loss": 0.9003, "step": 3792 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.830394584645479e-05, "loss": 0.9129, "step": 3793 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8302834374520452e-05, "loss": 0.7985, "step": 3794 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8301722572285144e-05, "loss": 0.7671, "step": 3795 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8300610439793085e-05, "loss": 0.88, "step": 3796 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8299497977088526e-05, "loss": 0.8199, "step": 3797 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.829838518421572e-05, "loss": 0.7688, "step": 3798 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8297272061218935e-05, "loss": 0.865, "step": 3799 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8296158608142454e-05, "loss": 0.96, "step": 3800 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8295044825030572e-05, "loss": 1.0063, "step": 3801 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8293930711927594e-05, "loss": 0.8233, "step": 3802 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8292816268877845e-05, "loss": 0.7197, "step": 3803 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.829170149592566e-05, "loss": 0.7823, "step": 3804 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8290586393115383e-05, "loss": 0.9233, "step": 3805 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.828947096049138e-05, "loss": 0.9543, "step": 3806 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8288355198098017e-05, "loss": 0.8246, "step": 3807 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.828723910597969e-05, "loss": 0.7157, "step": 3808 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.828612268418079e-05, "loss": 0.8144, "step": 3809 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8285005932745735e-05, "loss": 0.8015, "step": 3810 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8283888851718957e-05, "loss": 1.0167, "step": 3811 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8282771441144884e-05, "loss": 0.8344, "step": 3812 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8281653701067977e-05, "loss": 1.0226, "step": 3813 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8280535631532696e-05, "loss": 0.8489, "step": 3814 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8279417232583524e-05, "loss": 0.9634, "step": 3815 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.827829850426495e-05, "loss": 0.8253, "step": 3816 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.827717944662148e-05, "loss": 0.8699, "step": 3817 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.827606005969763e-05, "loss": 0.919, "step": 3818 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8274940343537935e-05, "loss": 0.8779, "step": 3819 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.827382029818694e-05, "loss": 0.905, "step": 3820 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8272699923689195e-05, "loss": 0.8046, "step": 3821 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8271579220089276e-05, "loss": 0.7319, "step": 3822 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.827045818743177e-05, "loss": 0.9839, "step": 3823 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8269336825761266e-05, "loss": 0.8979, "step": 3824 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8268215135122375e-05, "loss": 0.914, "step": 3825 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8267093115559723e-05, "loss": 0.9232, "step": 3826 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.826597076711795e-05, "loss": 0.8532, "step": 3827 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8264848089841694e-05, "loss": 0.847, "step": 3828 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8263725083775622e-05, "loss": 0.8531, "step": 3829 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.826260174896441e-05, "loss": 0.7497, "step": 3830 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8261478085452747e-05, "loss": 0.8793, "step": 3831 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8260354093285334e-05, "loss": 0.7004, "step": 3832 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8259229772506884e-05, "loss": 0.938, "step": 3833 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8258105123162125e-05, "loss": 0.8406, "step": 3834 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8256980145295797e-05, "loss": 0.8023, "step": 3835 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8255854838952653e-05, "loss": 1.0332, "step": 3836 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.825472920417746e-05, "loss": 0.8538, "step": 3837 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8253603241014998e-05, "loss": 0.8802, "step": 3838 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8252476949510062e-05, "loss": 0.7825, "step": 3839 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8251350329707452e-05, "loss": 0.7746, "step": 3840 }, { "epoch": 0.69, "grad_norm": 0.0, "learning_rate": 1.8250223381651992e-05, "loss": 0.823, "step": 3841 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8249096105388514e-05, "loss": 0.8138, "step": 3842 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8247968500961863e-05, "loss": 0.859, "step": 3843 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.824684056841689e-05, "loss": 0.9935, "step": 3844 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8245712307798475e-05, "loss": 0.7992, "step": 3845 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.82445837191515e-05, "loss": 0.9491, "step": 3846 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8243454802520858e-05, "loss": 0.8271, "step": 3847 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8242325557951466e-05, "loss": 0.8825, "step": 3848 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8241195985488242e-05, "loss": 0.8076, "step": 3849 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.824006608517612e-05, "loss": 0.8798, "step": 3850 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8238935857060057e-05, "loss": 0.8558, "step": 3851 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.823780530118501e-05, "loss": 0.8014, "step": 3852 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8236674417595957e-05, "loss": 0.8811, "step": 3853 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8235543206337883e-05, "loss": 0.7352, "step": 3854 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8234411667455796e-05, "loss": 0.7236, "step": 3855 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8233279800994704e-05, "loss": 0.7737, "step": 3856 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8232147606999636e-05, "loss": 0.9569, "step": 3857 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8231015085515636e-05, "loss": 0.8734, "step": 3858 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8229882236587755e-05, "loss": 0.8971, "step": 3859 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.822874906026106e-05, "loss": 0.6815, "step": 3860 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8227615556580632e-05, "loss": 0.7464, "step": 3861 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8226481725591557e-05, "loss": 0.9949, "step": 3862 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8225347567338953e-05, "loss": 0.7947, "step": 3863 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.822421308186793e-05, "loss": 0.6848, "step": 3864 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8223078269223617e-05, "loss": 0.8462, "step": 3865 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8221943129451168e-05, "loss": 0.8614, "step": 3866 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8220807662595737e-05, "loss": 1.0126, "step": 3867 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8219671868702495e-05, "loss": 0.7067, "step": 3868 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8218535747816618e-05, "loss": 0.7768, "step": 3869 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.821739929998332e-05, "loss": 0.831, "step": 3870 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8216262525247793e-05, "loss": 0.8987, "step": 3871 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8215125423655268e-05, "loss": 0.8767, "step": 3872 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.821398799525098e-05, "loss": 0.7406, "step": 3873 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8212850240080186e-05, "loss": 0.8181, "step": 3874 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8211712158188133e-05, "loss": 0.8476, "step": 3875 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.82105737496201e-05, "loss": 0.9149, "step": 3876 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8209435014421384e-05, "loss": 0.7003, "step": 3877 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.820829595263728e-05, "loss": 0.7369, "step": 3878 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.82071565643131e-05, "loss": 0.9008, "step": 3879 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.820601684949417e-05, "loss": 0.8476, "step": 3880 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8204876808225835e-05, "loss": 0.9502, "step": 3881 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8203736440553442e-05, "loss": 0.8278, "step": 3882 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.820259574652236e-05, "loss": 0.8377, "step": 3883 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8201454726177967e-05, "loss": 0.8493, "step": 3884 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8200313379565657e-05, "loss": 0.7992, "step": 3885 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.819917170673083e-05, "loss": 0.6871, "step": 3886 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.819802970771891e-05, "loss": 0.9071, "step": 3887 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8196887382575322e-05, "loss": 0.691, "step": 3888 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8195744731345516e-05, "loss": 0.8008, "step": 3889 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.819460175407494e-05, "loss": 0.8428, "step": 3890 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.819345845080907e-05, "loss": 0.8746, "step": 3891 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8192314821593387e-05, "loss": 0.8806, "step": 3892 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8191170866473384e-05, "loss": 0.8716, "step": 3893 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8190026585494572e-05, "loss": 0.853, "step": 3894 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.8188881978702473e-05, "loss": 0.8598, "step": 3895 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 1.818773704614262e-05, "loss": 0.7648, "step": 3896 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8186591787860562e-05, "loss": 0.8811, "step": 3897 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8185446203901857e-05, "loss": 0.7628, "step": 3898 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8184300294312082e-05, "loss": 0.91, "step": 3899 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.818315405913682e-05, "loss": 0.9268, "step": 3900 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8182007498421666e-05, "loss": 0.7873, "step": 3901 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8180860612212244e-05, "loss": 0.7967, "step": 3902 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.817971340055417e-05, "loss": 0.9831, "step": 3903 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8178565863493084e-05, "loss": 0.9257, "step": 3904 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8177418001074637e-05, "loss": 0.7602, "step": 3905 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8176269813344495e-05, "loss": 0.7713, "step": 3906 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.817512130034833e-05, "loss": 0.9219, "step": 3907 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8173972462131834e-05, "loss": 0.8668, "step": 3908 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8172823298740714e-05, "loss": 0.9155, "step": 3909 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8171673810220682e-05, "loss": 0.89, "step": 3910 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8170523996617466e-05, "loss": 0.6819, "step": 3911 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8169373857976808e-05, "loss": 0.8468, "step": 3912 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8168223394344465e-05, "loss": 0.9183, "step": 3913 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.81670726057662e-05, "loss": 0.707, "step": 3914 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8165921492287803e-05, "loss": 0.7721, "step": 3915 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.816477005395505e-05, "loss": 0.792, "step": 3916 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8163618290813762e-05, "loss": 0.8474, "step": 3917 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8162466202909753e-05, "loss": 0.8112, "step": 3918 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8161313790288856e-05, "loss": 0.8861, "step": 3919 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8160161052996913e-05, "loss": 0.8018, "step": 3920 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8159007991079786e-05, "loss": 0.7272, "step": 3921 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.815785460458334e-05, "loss": 0.8313, "step": 3922 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.815670089355347e-05, "loss": 0.8184, "step": 3923 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8155546858036063e-05, "loss": 0.8823, "step": 3924 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8154392498077028e-05, "loss": 0.7473, "step": 3925 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.815323781372229e-05, "loss": 0.7333, "step": 3926 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8152082805017785e-05, "loss": 0.8747, "step": 3927 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8150927472009462e-05, "loss": 0.7799, "step": 3928 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8149771814743283e-05, "loss": 0.8449, "step": 3929 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8148615833265215e-05, "loss": 0.8916, "step": 3930 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.814745952762125e-05, "loss": 1.0093, "step": 3931 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8146302897857392e-05, "loss": 1.0179, "step": 3932 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.814514594401965e-05, "loss": 0.7945, "step": 3933 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8143988666154044e-05, "loss": 0.9298, "step": 3934 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.814283106430662e-05, "loss": 0.7635, "step": 3935 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8141673138523428e-05, "loss": 0.786, "step": 3936 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8140514888850525e-05, "loss": 0.7889, "step": 3937 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8139356315334002e-05, "loss": 0.8404, "step": 3938 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8138197418019938e-05, "loss": 0.9825, "step": 3939 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.813703819695444e-05, "loss": 0.8697, "step": 3940 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.813587865218362e-05, "loss": 0.8589, "step": 3941 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8134718783753613e-05, "loss": 0.8256, "step": 3942 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8133558591710553e-05, "loss": 0.7911, "step": 3943 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8132398076100605e-05, "loss": 0.8753, "step": 3944 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8131237236969927e-05, "loss": 0.857, "step": 3945 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.81300760743647e-05, "loss": 0.7603, "step": 3946 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.812891458833112e-05, "loss": 0.7719, "step": 3947 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8127752778915396e-05, "loss": 0.7818, "step": 3948 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.812659064616374e-05, "loss": 0.6434, "step": 3949 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.8125428190122384e-05, "loss": 0.7914, "step": 3950 }, { "epoch": 0.71, "grad_norm": 0.0, "learning_rate": 1.812426541083758e-05, "loss": 0.704, "step": 3951 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8123102308355576e-05, "loss": 0.7428, "step": 3952 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.812193888272265e-05, "loss": 0.8234, "step": 3953 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8120775133985077e-05, "loss": 0.9017, "step": 3954 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.811961106218916e-05, "loss": 0.8553, "step": 3955 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8118446667381208e-05, "loss": 0.7787, "step": 3956 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8117281949607534e-05, "loss": 0.7989, "step": 3957 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8116116908914482e-05, "loss": 0.9258, "step": 3958 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8114951545348398e-05, "loss": 0.9027, "step": 3959 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8113785858955636e-05, "loss": 0.7423, "step": 3960 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8112619849782574e-05, "loss": 0.8725, "step": 3961 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8111453517875597e-05, "loss": 0.772, "step": 3962 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8110286863281103e-05, "loss": 0.9412, "step": 3963 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8109119886045504e-05, "loss": 0.9634, "step": 3964 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8107952586215225e-05, "loss": 0.8083, "step": 3965 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.81067849638367e-05, "loss": 0.8979, "step": 3966 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8105617018956385e-05, "loss": 0.7756, "step": 3967 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.810444875162074e-05, "loss": 0.7223, "step": 3968 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8103280161876235e-05, "loss": 0.7342, "step": 3969 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8102111249769366e-05, "loss": 0.743, "step": 3970 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.810094201534663e-05, "loss": 0.8765, "step": 3971 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8099772458654547e-05, "loss": 0.7863, "step": 3972 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8098602579739638e-05, "loss": 0.8708, "step": 3973 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8097432378648445e-05, "loss": 0.8815, "step": 3974 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.809626185542752e-05, "loss": 0.6879, "step": 3975 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.809509101012343e-05, "loss": 0.713, "step": 3976 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8093919842782748e-05, "loss": 0.7552, "step": 3977 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.809274835345207e-05, "loss": 0.8163, "step": 3978 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8091576542178e-05, "loss": 0.7699, "step": 3979 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8090404409007155e-05, "loss": 0.684, "step": 3980 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8089231953986162e-05, "loss": 0.8557, "step": 3981 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8088059177161662e-05, "loss": 0.98, "step": 3982 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8086886078580317e-05, "loss": 0.7488, "step": 3983 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8085712658288787e-05, "loss": 0.7881, "step": 3984 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8084538916333754e-05, "loss": 0.7345, "step": 3985 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8083364852761917e-05, "loss": 0.909, "step": 3986 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.808219046761998e-05, "loss": 0.7032, "step": 3987 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8081015760954653e-05, "loss": 0.851, "step": 3988 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.807984073281268e-05, "loss": 0.7374, "step": 3989 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8078665383240804e-05, "loss": 0.7912, "step": 3990 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8077489712285778e-05, "loss": 0.8916, "step": 3991 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.807631371999437e-05, "loss": 0.9748, "step": 3992 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.807513740641337e-05, "loss": 1.0453, "step": 3993 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.807396077158957e-05, "loss": 0.8731, "step": 3994 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.807278381556978e-05, "loss": 0.8731, "step": 3995 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8071606538400817e-05, "loss": 0.993, "step": 3996 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8070428940129524e-05, "loss": 0.8809, "step": 3997 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.806925102080274e-05, "loss": 0.7082, "step": 3998 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8068072780467326e-05, "loss": 0.959, "step": 3999 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8066894219170156e-05, "loss": 0.8687, "step": 4000 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8065715336958112e-05, "loss": 0.8082, "step": 4001 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.80645361338781e-05, "loss": 0.9465, "step": 4002 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8063356609977018e-05, "loss": 0.8011, "step": 4003 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8062176765301802e-05, "loss": 0.6933, "step": 4004 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8060996599899383e-05, "loss": 0.7999, "step": 4005 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8059816113816708e-05, "loss": 0.8639, "step": 4006 }, { "epoch": 0.72, "grad_norm": 0.0, "learning_rate": 1.8058635307100742e-05, "loss": 0.7637, "step": 4007 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8057454179798456e-05, "loss": 0.7885, "step": 4008 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8056272731956842e-05, "loss": 0.8758, "step": 4009 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8055090963622898e-05, "loss": 0.8511, "step": 4010 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8053908874843636e-05, "loss": 0.8331, "step": 4011 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.805272646566608e-05, "loss": 0.7576, "step": 4012 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8051543736137272e-05, "loss": 0.8828, "step": 4013 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.805036068630426e-05, "loss": 0.7703, "step": 4014 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.804917731621411e-05, "loss": 0.7806, "step": 4015 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8047993625913897e-05, "loss": 0.8193, "step": 4016 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.804680961545071e-05, "loss": 0.7839, "step": 4017 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.804562528487165e-05, "loss": 0.7827, "step": 4018 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8044440634223836e-05, "loss": 0.8571, "step": 4019 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.804325566355439e-05, "loss": 0.7097, "step": 4020 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.804207037291046e-05, "loss": 0.7214, "step": 4021 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.804088476233919e-05, "loss": 0.8003, "step": 4022 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8039698831887748e-05, "loss": 0.7559, "step": 4023 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8038512581603314e-05, "loss": 0.7203, "step": 4024 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8037326011533084e-05, "loss": 0.7381, "step": 4025 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8036139121724247e-05, "loss": 0.7957, "step": 4026 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8034951912224037e-05, "loss": 0.8062, "step": 4027 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.803376438307967e-05, "loss": 0.7384, "step": 4028 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.80325765343384e-05, "loss": 0.8267, "step": 4029 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8031388366047472e-05, "loss": 0.9121, "step": 4030 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8030199878254154e-05, "loss": 0.7259, "step": 4031 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8029011071005728e-05, "loss": 0.739, "step": 4032 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8027821944349492e-05, "loss": 0.7746, "step": 4033 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.802663249833274e-05, "loss": 0.843, "step": 4034 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.80254427330028e-05, "loss": 0.8202, "step": 4035 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8024252648407002e-05, "loss": 0.8899, "step": 4036 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8023062244592686e-05, "loss": 0.6116, "step": 4037 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.802187152160721e-05, "loss": 0.9636, "step": 4038 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.802068047949794e-05, "loss": 0.9272, "step": 4039 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8019489118312263e-05, "loss": 0.8778, "step": 4040 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8018297438097568e-05, "loss": 0.9528, "step": 4041 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8017105438901266e-05, "loss": 0.7889, "step": 4042 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8015913120770778e-05, "loss": 0.7769, "step": 4043 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.801472048375353e-05, "loss": 0.8496, "step": 4044 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8013527527896974e-05, "loss": 0.8452, "step": 4045 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8012334253248567e-05, "loss": 0.7762, "step": 4046 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8011140659855773e-05, "loss": 0.712, "step": 4047 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8009946747766084e-05, "loss": 0.7922, "step": 4048 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8008752517026988e-05, "loss": 0.8854, "step": 4049 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8007557967685997e-05, "loss": 0.9908, "step": 4050 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8006363099790635e-05, "loss": 0.9728, "step": 4051 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.800516791338843e-05, "loss": 0.8533, "step": 4052 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8003972408526934e-05, "loss": 0.8469, "step": 4053 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.8002776585253702e-05, "loss": 0.754, "step": 4054 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.800158044361631e-05, "loss": 0.7717, "step": 4055 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.800038398366234e-05, "loss": 1.012, "step": 4056 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.799918720543939e-05, "loss": 0.8594, "step": 4057 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.799799010899507e-05, "loss": 0.7394, "step": 4058 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.7996792694377002e-05, "loss": 0.8089, "step": 4059 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.799559496163282e-05, "loss": 0.6965, "step": 4060 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.7994396910810175e-05, "loss": 0.8033, "step": 4061 }, { "epoch": 0.73, "grad_norm": 0.0, "learning_rate": 1.7993198541956725e-05, "loss": 0.8099, "step": 4062 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7991999855120147e-05, "loss": 1.0235, "step": 4063 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.799080085034812e-05, "loss": 0.7853, "step": 4064 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7989601527688344e-05, "loss": 0.8921, "step": 4065 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7988401887188534e-05, "loss": 0.8695, "step": 4066 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7987201928896414e-05, "loss": 0.8521, "step": 4067 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7986001652859717e-05, "loss": 0.9166, "step": 4068 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7984801059126193e-05, "loss": 0.9574, "step": 4069 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.798360014774361e-05, "loss": 0.7827, "step": 4070 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7982398918759728e-05, "loss": 0.8121, "step": 4071 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7981197372222342e-05, "loss": 0.7591, "step": 4072 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.797999550817926e-05, "loss": 0.8677, "step": 4073 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7978793326678282e-05, "loss": 0.7327, "step": 4074 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7977590827767235e-05, "loss": 0.9746, "step": 4075 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7976388011493963e-05, "loss": 0.8691, "step": 4076 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.797518487790631e-05, "loss": 0.7811, "step": 4077 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7973981427052137e-05, "loss": 0.8769, "step": 4078 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7972777658979326e-05, "loss": 0.7889, "step": 4079 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.797157357373576e-05, "loss": 0.8735, "step": 4080 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.797036917136934e-05, "loss": 0.7966, "step": 4081 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7969164451927984e-05, "loss": 0.723, "step": 4082 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7967959415459614e-05, "loss": 0.9994, "step": 4083 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7966754062012163e-05, "loss": 0.9091, "step": 4084 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7965548391633595e-05, "loss": 0.8509, "step": 4085 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.796434240437186e-05, "loss": 0.9756, "step": 4086 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7963136100274944e-05, "loss": 0.9206, "step": 4087 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.796192947939083e-05, "loss": 0.8464, "step": 4088 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7960722541767527e-05, "loss": 0.6776, "step": 4089 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.795951528745304e-05, "loss": 0.76, "step": 4090 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7958307716495402e-05, "loss": 0.8904, "step": 4091 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7957099828942643e-05, "loss": 0.8959, "step": 4092 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7955891624842826e-05, "loss": 0.8237, "step": 4093 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.795468310424401e-05, "loss": 0.806, "step": 4094 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7953474267194276e-05, "loss": 0.897, "step": 4095 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7952265113741707e-05, "loss": 0.8157, "step": 4096 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.795105564393441e-05, "loss": 0.885, "step": 4097 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.79498458578205e-05, "loss": 0.8462, "step": 4098 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.79486357554481e-05, "loss": 0.8822, "step": 4099 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7947425336865358e-05, "loss": 0.9384, "step": 4100 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7946214602120417e-05, "loss": 0.8528, "step": 4101 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7945003551261446e-05, "loss": 0.9119, "step": 4102 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7943792184336627e-05, "loss": 0.9338, "step": 4103 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.794258050139414e-05, "loss": 0.7523, "step": 4104 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.79413685024822e-05, "loss": 0.8618, "step": 4105 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7940156187649017e-05, "loss": 0.7584, "step": 4106 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7938943556942817e-05, "loss": 0.9085, "step": 4107 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7937730610411842e-05, "loss": 0.8688, "step": 4108 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7936517348104344e-05, "loss": 0.9354, "step": 4109 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7935303770068592e-05, "loss": 0.7373, "step": 4110 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.793408987635286e-05, "loss": 0.8451, "step": 4111 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7932875667005443e-05, "loss": 0.9834, "step": 4112 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.793166114207464e-05, "loss": 0.8635, "step": 4113 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.793044630160877e-05, "loss": 0.9765, "step": 4114 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.7929231145656157e-05, "loss": 0.8193, "step": 4115 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.792801567426515e-05, "loss": 0.8286, "step": 4116 }, { "epoch": 0.74, "grad_norm": 0.0, "learning_rate": 1.79267998874841e-05, "loss": 0.7292, "step": 4117 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.792558378536137e-05, "loss": 0.7104, "step": 4118 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7924367367945336e-05, "loss": 0.9176, "step": 4119 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7923150635284395e-05, "loss": 0.8112, "step": 4120 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.792193358742695e-05, "loss": 0.8305, "step": 4121 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7920716224421413e-05, "loss": 0.7605, "step": 4122 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7919498546316215e-05, "loss": 0.9753, "step": 4123 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7918280553159802e-05, "loss": 0.8726, "step": 4124 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7917062245000622e-05, "loss": 1.0205, "step": 4125 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7915843621887144e-05, "loss": 0.7908, "step": 4126 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7914624683867844e-05, "loss": 0.793, "step": 4127 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7913405430991218e-05, "loss": 0.7884, "step": 4128 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7912185863305765e-05, "loss": 0.7373, "step": 4129 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7910965980860005e-05, "loss": 0.7928, "step": 4130 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7909745783702468e-05, "loss": 0.8438, "step": 4131 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.790852527188169e-05, "loss": 0.7358, "step": 4132 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7907304445446233e-05, "loss": 0.7326, "step": 4133 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7906083304444656e-05, "loss": 0.8443, "step": 4134 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7904861848925543e-05, "loss": 0.7872, "step": 4135 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7903640078937482e-05, "loss": 0.8848, "step": 4136 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.790241799452908e-05, "loss": 0.8355, "step": 4137 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.790119559574895e-05, "loss": 0.6813, "step": 4138 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7899972882645727e-05, "loss": 0.7788, "step": 4139 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7898749855268048e-05, "loss": 0.7989, "step": 4140 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.789752651366457e-05, "loss": 0.8423, "step": 4141 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7896302857883957e-05, "loss": 0.9608, "step": 4142 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7895078887974892e-05, "loss": 0.8482, "step": 4143 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.789385460398606e-05, "loss": 0.7877, "step": 4144 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.789263000596617e-05, "loss": 0.8957, "step": 4145 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.789140509396394e-05, "loss": 0.837, "step": 4146 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.789017986802809e-05, "loss": 0.9506, "step": 4147 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7888954328207376e-05, "loss": 0.6315, "step": 4148 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.788772847455054e-05, "loss": 0.8149, "step": 4149 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.788650230710635e-05, "loss": 0.8408, "step": 4150 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7885275825923597e-05, "loss": 0.8577, "step": 4151 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7884049031051053e-05, "loss": 0.8106, "step": 4152 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.788282192253754e-05, "loss": 0.8212, "step": 4153 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.788159450043186e-05, "loss": 0.8899, "step": 4154 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7880366764782856e-05, "loss": 0.8675, "step": 4155 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.787913871563936e-05, "loss": 0.7447, "step": 4156 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.787791035305023e-05, "loss": 0.8066, "step": 4157 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7876681677064323e-05, "loss": 0.8143, "step": 4158 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7875452687730533e-05, "loss": 0.7119, "step": 4159 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.787422338509774e-05, "loss": 0.8476, "step": 4160 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7872993769214852e-05, "loss": 0.8438, "step": 4161 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7871763840130787e-05, "loss": 0.9471, "step": 4162 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.787053359789447e-05, "loss": 0.8313, "step": 4163 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7869303042554844e-05, "loss": 0.7023, "step": 4164 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.786807217416086e-05, "loss": 0.7892, "step": 4165 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.786684099276149e-05, "loss": 0.7916, "step": 4166 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7865609498405704e-05, "loss": 0.9333, "step": 4167 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.78643776911425e-05, "loss": 0.7566, "step": 4168 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7863145571020876e-05, "loss": 0.7415, "step": 4169 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.786191313808986e-05, "loss": 0.8451, "step": 4170 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.786068039239846e-05, "loss": 0.9973, "step": 4171 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7859447333995736e-05, "loss": 0.9172, "step": 4172 }, { "epoch": 0.75, "grad_norm": 0.0, "learning_rate": 1.7858213962930727e-05, "loss": 0.9312, "step": 4173 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7856980279252506e-05, "loss": 0.7594, "step": 4174 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7855746283010152e-05, "loss": 0.8207, "step": 4175 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.785451197425275e-05, "loss": 0.9499, "step": 4176 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.785327735302941e-05, "loss": 0.8564, "step": 4177 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7852042419389244e-05, "loss": 0.8024, "step": 4178 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7850807173381377e-05, "loss": 0.8561, "step": 4179 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7849571615054953e-05, "loss": 0.7455, "step": 4180 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.784833574445912e-05, "loss": 0.9736, "step": 4181 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.784709956164305e-05, "loss": 1.0303, "step": 4182 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7845863066655913e-05, "loss": 0.774, "step": 4183 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7844626259546904e-05, "loss": 0.9232, "step": 4184 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.784338914036522e-05, "loss": 0.8516, "step": 4185 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7842151709160086e-05, "loss": 0.8379, "step": 4186 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.784091396598072e-05, "loss": 0.7531, "step": 4187 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7839675910876365e-05, "loss": 0.7477, "step": 4188 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.783843754389627e-05, "loss": 0.8584, "step": 4189 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7837198865089705e-05, "loss": 0.8945, "step": 4190 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.783595987450594e-05, "loss": 0.7913, "step": 4191 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7834720572194268e-05, "loss": 0.8351, "step": 4192 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.783348095820399e-05, "loss": 0.7626, "step": 4193 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.783224103258442e-05, "loss": 0.9631, "step": 4194 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.783100079538489e-05, "loss": 0.7379, "step": 4195 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7829760246654728e-05, "loss": 1.0136, "step": 4196 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7828519386443287e-05, "loss": 0.8101, "step": 4197 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.782727821479994e-05, "loss": 0.824, "step": 4198 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7826036731774054e-05, "loss": 0.9134, "step": 4199 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7824794937415018e-05, "loss": 0.8569, "step": 4200 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7823552831772234e-05, "loss": 0.8051, "step": 4201 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.782231041489512e-05, "loss": 0.8862, "step": 4202 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.78210676868331e-05, "loss": 0.7645, "step": 4203 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7819824647635602e-05, "loss": 0.9101, "step": 4204 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7818581297352086e-05, "loss": 0.9278, "step": 4205 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7817337636032013e-05, "loss": 0.8373, "step": 4206 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7816093663724858e-05, "loss": 0.8428, "step": 4207 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7814849380480103e-05, "loss": 0.9761, "step": 4208 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7813604786347257e-05, "loss": 0.8951, "step": 4209 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7812359881375824e-05, "loss": 0.7574, "step": 4210 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7811114665615334e-05, "loss": 0.7669, "step": 4211 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7809869139115318e-05, "loss": 0.7568, "step": 4212 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7808623301925335e-05, "loss": 0.7958, "step": 4213 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7807377154094933e-05, "loss": 0.8339, "step": 4214 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7806130695673697e-05, "loss": 0.8706, "step": 4215 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.780488392671121e-05, "loss": 0.831, "step": 4216 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7803636847257067e-05, "loss": 0.8366, "step": 4217 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7802389457360882e-05, "loss": 0.812, "step": 4218 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.780114175707228e-05, "loss": 0.7901, "step": 4219 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7799893746440893e-05, "loss": 0.8599, "step": 4220 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7798645425516368e-05, "loss": 0.7927, "step": 4221 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7797396794348375e-05, "loss": 0.9057, "step": 4222 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7796147852986575e-05, "loss": 0.7742, "step": 4223 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.779489860148066e-05, "loss": 0.9358, "step": 4224 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7793649039880326e-05, "loss": 0.8337, "step": 4225 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.779239916823528e-05, "loss": 0.8435, "step": 4226 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.7791148986595247e-05, "loss": 0.9031, "step": 4227 }, { "epoch": 0.76, "grad_norm": 0.0, "learning_rate": 1.778989849500996e-05, "loss": 0.8793, "step": 4228 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7788647693529167e-05, "loss": 0.841, "step": 4229 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7787396582202625e-05, "loss": 0.8241, "step": 4230 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7786145161080105e-05, "loss": 0.7567, "step": 4231 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7784893430211396e-05, "loss": 0.7638, "step": 4232 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7783641389646288e-05, "loss": 0.8574, "step": 4233 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.778238903943459e-05, "loss": 0.7684, "step": 4234 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7781136379626127e-05, "loss": 0.874, "step": 4235 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7779883410270727e-05, "loss": 0.7851, "step": 4236 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7778630131418237e-05, "loss": 0.7871, "step": 4237 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7777376543118513e-05, "loss": 0.9193, "step": 4238 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.777612264542143e-05, "loss": 0.8527, "step": 4239 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7774868438376868e-05, "loss": 0.7901, "step": 4240 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7773613922034717e-05, "loss": 0.8194, "step": 4241 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.777235909644489e-05, "loss": 0.8104, "step": 4242 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.77711039616573e-05, "loss": 0.9334, "step": 4243 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7769848517721884e-05, "loss": 0.8099, "step": 4244 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.776859276468858e-05, "loss": 0.84, "step": 4245 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7767336702607352e-05, "loss": 0.923, "step": 4246 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7766080331528165e-05, "loss": 0.9512, "step": 4247 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7764823651500995e-05, "loss": 0.8471, "step": 4248 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7763566662575837e-05, "loss": 0.8532, "step": 4249 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.77623093648027e-05, "loss": 0.7229, "step": 4250 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.77610517582316e-05, "loss": 0.781, "step": 4251 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7759793842912562e-05, "loss": 0.8635, "step": 4252 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7758535618895636e-05, "loss": 0.9969, "step": 4253 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.775727708623087e-05, "loss": 0.9744, "step": 4254 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7756018244968332e-05, "loss": 0.6931, "step": 4255 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.77547590951581e-05, "loss": 0.8345, "step": 4256 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.775349963685027e-05, "loss": 0.751, "step": 4257 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.775223987009494e-05, "loss": 0.8347, "step": 4258 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7750979794942226e-05, "loss": 0.7431, "step": 4259 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7749719411442262e-05, "loss": 0.7326, "step": 4260 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7748458719645182e-05, "loss": 0.9303, "step": 4261 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7747197719601137e-05, "loss": 0.724, "step": 4262 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7745936411360295e-05, "loss": 0.938, "step": 4263 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7744674794972837e-05, "loss": 0.7574, "step": 4264 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7743412870488946e-05, "loss": 0.7692, "step": 4265 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7742150637958827e-05, "loss": 0.7395, "step": 4266 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7740888097432694e-05, "loss": 0.8537, "step": 4267 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.773962524896077e-05, "loss": 0.7987, "step": 4268 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7738362092593297e-05, "loss": 0.8751, "step": 4269 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7737098628380517e-05, "loss": 0.6827, "step": 4270 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7735834856372705e-05, "loss": 0.8968, "step": 4271 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.773457077662013e-05, "loss": 0.926, "step": 4272 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7733306389173077e-05, "loss": 0.6403, "step": 4273 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7732041694081848e-05, "loss": 0.831, "step": 4274 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7730776691396756e-05, "loss": 0.8377, "step": 4275 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7729511381168123e-05, "loss": 0.8251, "step": 4276 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7728245763446286e-05, "loss": 0.8132, "step": 4277 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7726979838281593e-05, "loss": 0.7137, "step": 4278 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7725713605724405e-05, "loss": 0.8772, "step": 4279 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7724447065825097e-05, "loss": 0.7788, "step": 4280 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.772318021863405e-05, "loss": 0.8488, "step": 4281 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.7721913064201665e-05, "loss": 0.8953, "step": 4282 }, { "epoch": 0.77, "grad_norm": 0.0, "learning_rate": 1.772064560257835e-05, "loss": 0.8002, "step": 4283 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7719377833814523e-05, "loss": 0.868, "step": 4284 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7718109757960623e-05, "loss": 0.7172, "step": 4285 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.77168413750671e-05, "loss": 0.9488, "step": 4286 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7715572685184403e-05, "loss": 0.7998, "step": 4287 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7714303688363008e-05, "loss": 0.8268, "step": 4288 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7713034384653393e-05, "loss": 0.9049, "step": 4289 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.771176477410606e-05, "loss": 0.8701, "step": 4290 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7710494856771515e-05, "loss": 0.8784, "step": 4291 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7709224632700275e-05, "loss": 0.7835, "step": 4292 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7707954101942873e-05, "loss": 0.8199, "step": 4293 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.770668326454985e-05, "loss": 0.6953, "step": 4294 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7705412120571767e-05, "loss": 0.7265, "step": 4295 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7704140670059187e-05, "loss": 0.909, "step": 4296 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7702868913062698e-05, "loss": 0.8772, "step": 4297 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7701596849632883e-05, "loss": 0.7741, "step": 4298 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7700324479820352e-05, "loss": 0.8474, "step": 4299 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7699051803675723e-05, "loss": 0.8292, "step": 4300 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7697778821249625e-05, "loss": 0.7808, "step": 4301 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.76965055325927e-05, "loss": 0.7342, "step": 4302 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7695231937755598e-05, "loss": 0.8716, "step": 4303 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7693958036788984e-05, "loss": 0.8141, "step": 4304 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7692683829743546e-05, "loss": 0.7471, "step": 4305 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7691409316669958e-05, "loss": 0.8146, "step": 4306 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.769013449761894e-05, "loss": 0.8211, "step": 4307 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.768885937264119e-05, "loss": 0.9148, "step": 4308 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7687583941787447e-05, "loss": 0.805, "step": 4309 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.768630820510844e-05, "loss": 0.8652, "step": 4310 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7685032162654932e-05, "loss": 0.7143, "step": 4311 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.768375581447768e-05, "loss": 0.8072, "step": 4312 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7682479160627454e-05, "loss": 0.7891, "step": 4313 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7681202201155046e-05, "loss": 0.7931, "step": 4314 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7679924936111258e-05, "loss": 0.8252, "step": 4315 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.76786473655469e-05, "loss": 0.785, "step": 4316 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7677369489512795e-05, "loss": 0.9809, "step": 4317 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7676091308059778e-05, "loss": 0.9059, "step": 4318 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7674812821238702e-05, "loss": 0.8522, "step": 4319 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7673534029100422e-05, "loss": 0.8091, "step": 4320 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7672254931695812e-05, "loss": 0.818, "step": 4321 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.767097552907576e-05, "loss": 0.7786, "step": 4322 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7669695821291156e-05, "loss": 0.8937, "step": 4323 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7668415808392916e-05, "loss": 0.7908, "step": 4324 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7667135490431963e-05, "loss": 0.8353, "step": 4325 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.766585486745922e-05, "loss": 0.7032, "step": 4326 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.766457393952564e-05, "loss": 0.9274, "step": 4327 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.766329270668218e-05, "loss": 0.701, "step": 4328 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7662011168979805e-05, "loss": 0.8609, "step": 4329 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.76607293264695e-05, "loss": 0.8237, "step": 4330 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.765944717920226e-05, "loss": 0.8174, "step": 4331 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.765816472722909e-05, "loss": 0.7341, "step": 4332 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.765688197060101e-05, "loss": 0.7094, "step": 4333 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7655598909369045e-05, "loss": 0.895, "step": 4334 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7654315543584243e-05, "loss": 0.8001, "step": 4335 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7653031873297653e-05, "loss": 0.8616, "step": 4336 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7651747898560344e-05, "loss": 0.8159, "step": 4337 }, { "epoch": 0.78, "grad_norm": 0.0, "learning_rate": 1.7650463619423402e-05, "loss": 0.9342, "step": 4338 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7649179035937907e-05, "loss": 0.7576, "step": 4339 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7647894148154968e-05, "loss": 0.8352, "step": 4340 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7646608956125695e-05, "loss": 0.8582, "step": 4341 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.764532345990122e-05, "loss": 1.0371, "step": 4342 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.764403765953268e-05, "loss": 0.8442, "step": 4343 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.764275155507123e-05, "loss": 0.9458, "step": 4344 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7641465146568025e-05, "loss": 0.796, "step": 4345 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.764017843407425e-05, "loss": 0.8304, "step": 4346 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7638891417641087e-05, "loss": 0.8904, "step": 4347 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7637604097319738e-05, "loss": 0.8127, "step": 4348 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7636316473161413e-05, "loss": 0.8538, "step": 4349 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7635028545217337e-05, "loss": 0.758, "step": 4350 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.763374031353875e-05, "loss": 0.9126, "step": 4351 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.763245177817689e-05, "loss": 0.854, "step": 4352 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7631162939183025e-05, "loss": 0.795, "step": 4353 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7629873796608427e-05, "loss": 0.972, "step": 4354 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7628584350504377e-05, "loss": 0.766, "step": 4355 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7627294600922175e-05, "loss": 0.8483, "step": 4356 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7626004547913126e-05, "loss": 0.7892, "step": 4357 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7624714191528555e-05, "loss": 0.781, "step": 4358 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7623423531819788e-05, "loss": 0.6228, "step": 4359 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7622132568838172e-05, "loss": 0.7661, "step": 4360 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7620841302635068e-05, "loss": 0.8557, "step": 4361 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.761954973326184e-05, "loss": 0.8164, "step": 4362 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7618257860769873e-05, "loss": 0.862, "step": 4363 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7616965685210554e-05, "loss": 0.7936, "step": 4364 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7615673206635294e-05, "loss": 0.7539, "step": 4365 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7614380425095506e-05, "loss": 0.7877, "step": 4366 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.761308734064262e-05, "loss": 0.8062, "step": 4367 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.761179395332808e-05, "loss": 0.8375, "step": 4368 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.761050026320334e-05, "loss": 0.9692, "step": 4369 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7609206270319856e-05, "loss": 0.8819, "step": 4370 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.760791197472911e-05, "loss": 0.8258, "step": 4371 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.76066173764826e-05, "loss": 0.7062, "step": 4372 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7605322475631815e-05, "loss": 0.8567, "step": 4373 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7604027272228275e-05, "loss": 0.8513, "step": 4374 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7602731766323502e-05, "loss": 0.7189, "step": 4375 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7601435957969035e-05, "loss": 0.7078, "step": 4376 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7600139847216426e-05, "loss": 0.7033, "step": 4377 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7598843434117233e-05, "loss": 0.9461, "step": 4378 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.759754671872303e-05, "loss": 0.8879, "step": 4379 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7596249701085403e-05, "loss": 0.8571, "step": 4380 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.759495238125595e-05, "loss": 0.7144, "step": 4381 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.759365475928628e-05, "loss": 0.7564, "step": 4382 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7592356835228016e-05, "loss": 0.9254, "step": 4383 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.759105860913279e-05, "loss": 0.8069, "step": 4384 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7589760081052247e-05, "loss": 1.0285, "step": 4385 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.758846125103805e-05, "loss": 0.7339, "step": 4386 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.758716211914186e-05, "loss": 0.7477, "step": 4387 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7585862685415368e-05, "loss": 0.8427, "step": 4388 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.758456294991026e-05, "loss": 0.8991, "step": 4389 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7583262912678246e-05, "loss": 0.9129, "step": 4390 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.758196257377104e-05, "loss": 0.8637, "step": 4391 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.758066193324038e-05, "loss": 0.8454, "step": 4392 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.7579360991137995e-05, "loss": 0.8539, "step": 4393 }, { "epoch": 0.79, "grad_norm": 0.0, "learning_rate": 1.757805974751565e-05, "loss": 0.785, "step": 4394 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7576758202425105e-05, "loss": 0.8915, "step": 4395 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.757545635591814e-05, "loss": 0.7534, "step": 4396 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.757415420804654e-05, "loss": 0.6986, "step": 4397 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7572851758862115e-05, "loss": 0.8279, "step": 4398 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7571549008416672e-05, "loss": 0.6752, "step": 4399 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.757024595676204e-05, "loss": 0.7399, "step": 4400 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7568942603950052e-05, "loss": 0.7621, "step": 4401 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7567638950032565e-05, "loss": 0.8912, "step": 4402 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7566334995061433e-05, "loss": 0.8132, "step": 4403 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7565030739088533e-05, "loss": 0.8814, "step": 4404 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7563726182165752e-05, "loss": 0.8116, "step": 4405 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.756242132434499e-05, "loss": 0.8107, "step": 4406 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7561116165678144e-05, "loss": 0.7662, "step": 4407 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.755981070621715e-05, "loss": 0.918, "step": 4408 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7558504946013933e-05, "loss": 0.7804, "step": 4409 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7557198885120444e-05, "loss": 0.8303, "step": 4410 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7555892523588636e-05, "loss": 0.8069, "step": 4411 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.755458586147048e-05, "loss": 0.875, "step": 4412 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7553278898817953e-05, "loss": 0.8886, "step": 4413 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7551971635683053e-05, "loss": 0.9428, "step": 4414 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7550664072117787e-05, "loss": 0.8863, "step": 4415 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7549356208174167e-05, "loss": 0.8053, "step": 4416 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7548048043904225e-05, "loss": 0.7938, "step": 4417 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7546739579359998e-05, "loss": 0.7687, "step": 4418 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.754543081459355e-05, "loss": 0.8365, "step": 4419 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.754412174965693e-05, "loss": 0.6624, "step": 4420 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.754281238460223e-05, "loss": 0.7544, "step": 4421 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7541502719481525e-05, "loss": 0.8091, "step": 4422 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7540192754346926e-05, "loss": 0.8737, "step": 4423 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7538882489250543e-05, "loss": 0.813, "step": 4424 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.75375719242445e-05, "loss": 0.8328, "step": 4425 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.753626105938093e-05, "loss": 0.7299, "step": 4426 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.753494989471199e-05, "loss": 0.8631, "step": 4427 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.753363843028983e-05, "loss": 0.9, "step": 4428 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7532326666166628e-05, "loss": 0.8239, "step": 4429 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.753101460239457e-05, "loss": 0.7925, "step": 4430 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7529702239025848e-05, "loss": 0.8012, "step": 4431 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.752838957611267e-05, "loss": 0.6934, "step": 4432 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7527076613707264e-05, "loss": 0.8741, "step": 4433 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.752576335186185e-05, "loss": 0.8486, "step": 4434 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.752444979062868e-05, "loss": 0.8137, "step": 4435 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7523135930060005e-05, "loss": 0.799, "step": 4436 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.75218217702081e-05, "loss": 0.9931, "step": 4437 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7520507311125236e-05, "loss": 0.8306, "step": 4438 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7519192552863705e-05, "loss": 0.7408, "step": 4439 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7517877495475816e-05, "loss": 0.7918, "step": 4440 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7516562139013882e-05, "loss": 0.7801, "step": 4441 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.751524648353023e-05, "loss": 0.8416, "step": 4442 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7513930529077196e-05, "loss": 0.9228, "step": 4443 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7512614275707135e-05, "loss": 0.7745, "step": 4444 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.751129772347241e-05, "loss": 0.8995, "step": 4445 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.750998087242539e-05, "loss": 0.9303, "step": 4446 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.7508663722618467e-05, "loss": 0.8651, "step": 4447 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.750734627410404e-05, "loss": 0.7715, "step": 4448 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 1.750602852693452e-05, "loss": 0.7324, "step": 4449 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.750471048116232e-05, "loss": 0.7653, "step": 4450 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7503392136839884e-05, "loss": 0.8862, "step": 4451 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7502073494019653e-05, "loss": 0.8739, "step": 4452 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.750075455275409e-05, "loss": 0.8051, "step": 4453 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.749943531309566e-05, "loss": 0.9318, "step": 4454 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7498115775096843e-05, "loss": 0.7955, "step": 4455 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7496795938810137e-05, "loss": 0.8662, "step": 4456 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.749547580428805e-05, "loss": 0.7458, "step": 4457 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.749415537158309e-05, "loss": 0.9447, "step": 4458 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7492834640747793e-05, "loss": 0.9348, "step": 4459 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.74915136118347e-05, "loss": 0.9329, "step": 4460 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.749019228489636e-05, "loss": 1.0229, "step": 4461 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7488870659985337e-05, "loss": 0.8122, "step": 4462 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7487548737154213e-05, "loss": 0.8445, "step": 4463 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7486226516455575e-05, "loss": 0.7137, "step": 4464 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.748490399794202e-05, "loss": 0.7834, "step": 4465 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.748358118166616e-05, "loss": 0.8374, "step": 4466 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.748225806768062e-05, "loss": 1.1288, "step": 4467 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7480934656038036e-05, "loss": 0.7502, "step": 4468 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.747961094679106e-05, "loss": 0.8713, "step": 4469 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.747828693999234e-05, "loss": 0.8359, "step": 4470 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7476962635694563e-05, "loss": 0.8029, "step": 4471 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.74756380339504e-05, "loss": 0.9133, "step": 4472 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7474313134812546e-05, "loss": 0.8766, "step": 4473 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7472987938333715e-05, "loss": 0.8881, "step": 4474 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.747166244456662e-05, "loss": 0.8107, "step": 4475 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7470336653563998e-05, "loss": 0.8425, "step": 4476 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7469010565378584e-05, "loss": 0.8823, "step": 4477 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7467684180063133e-05, "loss": 0.794, "step": 4478 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7466357497670415e-05, "loss": 0.8607, "step": 4479 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.74650305182532e-05, "loss": 0.6144, "step": 4480 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7463703241864287e-05, "loss": 0.7977, "step": 4481 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7462375668556474e-05, "loss": 0.9418, "step": 4482 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.746104779838257e-05, "loss": 0.7581, "step": 4483 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7459719631395405e-05, "loss": 0.8046, "step": 4484 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.745839116764781e-05, "loss": 0.718, "step": 4485 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7457062407192643e-05, "loss": 0.9907, "step": 4486 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.745573335008276e-05, "loss": 0.7788, "step": 4487 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7454403996371024e-05, "loss": 0.8082, "step": 4488 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7453074346110333e-05, "loss": 0.762, "step": 4489 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7451744399353576e-05, "loss": 0.8861, "step": 4490 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7450414156153658e-05, "loss": 0.687, "step": 4491 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7449083616563504e-05, "loss": 0.8834, "step": 4492 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7447752780636045e-05, "loss": 0.9487, "step": 4493 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.744642164842422e-05, "loss": 0.8391, "step": 4494 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7445090219980985e-05, "loss": 0.7116, "step": 4495 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7443758495359303e-05, "loss": 0.7382, "step": 4496 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7442426474612163e-05, "loss": 0.7599, "step": 4497 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7441094157792543e-05, "loss": 0.9328, "step": 4498 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.743976154495345e-05, "loss": 0.8368, "step": 4499 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7438428636147905e-05, "loss": 0.9117, "step": 4500 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.743709543142892e-05, "loss": 0.9163, "step": 4501 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.743576193084954e-05, "loss": 0.8334, "step": 4502 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.743442813446281e-05, "loss": 0.829, "step": 4503 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 1.7433094042321796e-05, "loss": 0.8548, "step": 4504 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7431759654479562e-05, "loss": 0.827, "step": 4505 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7430424970989203e-05, "loss": 0.8846, "step": 4506 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7429089991903807e-05, "loss": 0.7651, "step": 4507 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7427754717276485e-05, "loss": 0.9113, "step": 4508 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.742641914716036e-05, "loss": 0.891, "step": 4509 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7425083281608555e-05, "loss": 0.8397, "step": 4510 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7423747120674218e-05, "loss": 0.7868, "step": 4511 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.74224106644105e-05, "loss": 0.8385, "step": 4512 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7421073912870573e-05, "loss": 0.801, "step": 4513 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7419736866107615e-05, "loss": 0.808, "step": 4514 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7418399524174813e-05, "loss": 0.8391, "step": 4515 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7417061887125368e-05, "loss": 0.8246, "step": 4516 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7415723955012493e-05, "loss": 0.7831, "step": 4517 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7414385727889418e-05, "loss": 0.8688, "step": 4518 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7413047205809378e-05, "loss": 0.7561, "step": 4519 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.741170838882562e-05, "loss": 0.8012, "step": 4520 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7410369276991406e-05, "loss": 0.8232, "step": 4521 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7409029870360002e-05, "loss": 0.9452, "step": 4522 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7407690168984705e-05, "loss": 0.7654, "step": 4523 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7406350172918798e-05, "loss": 0.8628, "step": 4524 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7405009882215594e-05, "loss": 0.839, "step": 4525 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7403669296928415e-05, "loss": 0.8808, "step": 4526 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7402328417110585e-05, "loss": 0.896, "step": 4527 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.740098724281545e-05, "loss": 0.7352, "step": 4528 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.739964577409636e-05, "loss": 0.853, "step": 4529 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7398304011006692e-05, "loss": 0.9232, "step": 4530 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7396961953599815e-05, "loss": 0.9663, "step": 4531 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7395619601929116e-05, "loss": 0.7958, "step": 4532 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7394276956048003e-05, "loss": 0.8871, "step": 4533 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7392934016009885e-05, "loss": 0.8495, "step": 4534 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.739159078186819e-05, "loss": 0.753, "step": 4535 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7390247253676346e-05, "loss": 0.7535, "step": 4536 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7388903431487806e-05, "loss": 0.7628, "step": 4537 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7387559315356032e-05, "loss": 0.8241, "step": 4538 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7386214905334494e-05, "loss": 0.8829, "step": 4539 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7384870201476675e-05, "loss": 0.7873, "step": 4540 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7383525203836062e-05, "loss": 0.8969, "step": 4541 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.738217991246617e-05, "loss": 0.917, "step": 4542 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7380834327420516e-05, "loss": 0.8435, "step": 4543 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.737948844875263e-05, "loss": 0.9177, "step": 4544 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.737814227651605e-05, "loss": 0.9435, "step": 4545 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.737679581076433e-05, "loss": 0.9139, "step": 4546 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7375449051551035e-05, "loss": 0.7261, "step": 4547 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7374101998929746e-05, "loss": 0.7972, "step": 4548 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7372754652954045e-05, "loss": 0.7636, "step": 4549 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7371407013677533e-05, "loss": 0.8465, "step": 4550 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.737005908115382e-05, "loss": 0.9566, "step": 4551 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7368710855436533e-05, "loss": 0.7487, "step": 4552 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7367362336579304e-05, "loss": 0.8206, "step": 4553 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7366013524635784e-05, "loss": 0.776, "step": 4554 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7364664419659626e-05, "loss": 0.7961, "step": 4555 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7363315021704496e-05, "loss": 0.9586, "step": 4556 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.7361965330824088e-05, "loss": 0.9635, "step": 4557 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.736061534707208e-05, "loss": 0.9329, "step": 4558 }, { "epoch": 0.82, "grad_norm": 0.0, "learning_rate": 1.735926507050219e-05, "loss": 0.7328, "step": 4559 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7357914501168124e-05, "loss": 0.8112, "step": 4560 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.735656363912362e-05, "loss": 0.956, "step": 4561 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7355212484422407e-05, "loss": 0.8343, "step": 4562 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.735386103711824e-05, "loss": 0.9219, "step": 4563 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7352509297264886e-05, "loss": 0.8229, "step": 4564 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7351157264916114e-05, "loss": 0.8605, "step": 4565 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7349804940125716e-05, "loss": 0.8246, "step": 4566 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7348452322947486e-05, "loss": 0.9586, "step": 4567 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7347099413435232e-05, "loss": 0.8259, "step": 4568 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.734574621164278e-05, "loss": 0.7249, "step": 4569 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.734439271762396e-05, "loss": 0.8323, "step": 4570 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.734303893143261e-05, "loss": 0.8622, "step": 4571 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.73416848531226e-05, "loss": 0.8745, "step": 4572 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7340330482747783e-05, "loss": 0.7745, "step": 4573 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.733897582036205e-05, "loss": 0.9272, "step": 4574 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7337620866019282e-05, "loss": 0.8549, "step": 4575 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7336265619773387e-05, "loss": 0.6629, "step": 4576 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.733491008167828e-05, "loss": 0.7324, "step": 4577 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7333554251787883e-05, "loss": 0.7745, "step": 4578 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7332198130156134e-05, "loss": 0.8092, "step": 4579 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7330841716836982e-05, "loss": 0.8713, "step": 4580 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7329485011884394e-05, "loss": 0.8597, "step": 4581 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.732812801535233e-05, "loss": 0.7702, "step": 4582 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.732677072729478e-05, "loss": 0.7441, "step": 4583 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7325413147765742e-05, "loss": 0.9297, "step": 4584 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.732405527681922e-05, "loss": 0.9354, "step": 4585 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.732269711450923e-05, "loss": 0.8969, "step": 4586 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7321338660889807e-05, "loss": 0.9659, "step": 4587 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7319979916014985e-05, "loss": 0.6701, "step": 4588 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7318620879938824e-05, "loss": 0.979, "step": 4589 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.731726155271539e-05, "loss": 0.827, "step": 4590 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7315901934398755e-05, "loss": 0.8232, "step": 4591 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.731454202504301e-05, "loss": 0.8919, "step": 4592 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.731318182470225e-05, "loss": 0.9167, "step": 4593 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7311821333430586e-05, "loss": 0.7737, "step": 4594 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7310460551282148e-05, "loss": 0.8515, "step": 4595 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7309099478311063e-05, "loss": 0.8077, "step": 4596 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7307738114571483e-05, "loss": 0.8275, "step": 4597 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.730637646011756e-05, "loss": 0.6685, "step": 4598 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7305014515003465e-05, "loss": 0.8296, "step": 4599 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7303652279283375e-05, "loss": 0.7603, "step": 4600 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7302289753011492e-05, "loss": 0.8463, "step": 4601 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.730092693624201e-05, "loss": 0.8099, "step": 4602 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7299563829029145e-05, "loss": 0.8054, "step": 4603 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7298200431427128e-05, "loss": 0.7178, "step": 4604 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7296836743490195e-05, "loss": 0.8137, "step": 4605 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7295472765272598e-05, "loss": 0.8782, "step": 4606 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7294108496828594e-05, "loss": 0.7741, "step": 4607 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.729274393821246e-05, "loss": 0.9325, "step": 4608 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7291379089478478e-05, "loss": 0.7406, "step": 4609 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7290013950680948e-05, "loss": 0.7216, "step": 4610 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.728864852187417e-05, "loss": 0.8383, "step": 4611 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.728728280311247e-05, "loss": 0.9867, "step": 4612 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.728591679445018e-05, "loss": 0.7011, "step": 4613 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7284550495941633e-05, "loss": 0.783, "step": 4614 }, { "epoch": 0.83, "grad_norm": 0.0, "learning_rate": 1.7283183907641188e-05, "loss": 0.688, "step": 4615 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7281817029603213e-05, "loss": 0.8559, "step": 4616 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7280449861882084e-05, "loss": 0.8335, "step": 4617 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.727908240453218e-05, "loss": 0.949, "step": 4618 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7277714657607917e-05, "loss": 0.6781, "step": 4619 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7276346621163692e-05, "loss": 0.6973, "step": 4620 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7274978295253933e-05, "loss": 0.738, "step": 4621 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7273609679933077e-05, "loss": 0.8398, "step": 4622 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7272240775255568e-05, "loss": 0.9546, "step": 4623 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.727087158127586e-05, "loss": 0.7075, "step": 4624 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.726950209804843e-05, "loss": 0.8806, "step": 4625 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7268132325627748e-05, "loss": 0.973, "step": 4626 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7266762264068313e-05, "loss": 1.0578, "step": 4627 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7265391913424624e-05, "loss": 0.8137, "step": 4628 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7264021273751203e-05, "loss": 0.7966, "step": 4629 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.726265034510257e-05, "loss": 0.7967, "step": 4630 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7261279127533264e-05, "loss": 0.8017, "step": 4631 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7259907621097835e-05, "loss": 0.7584, "step": 4632 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7258535825850845e-05, "loss": 0.8882, "step": 4633 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7257163741846865e-05, "loss": 0.8011, "step": 4634 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7255791369140476e-05, "loss": 0.8771, "step": 4635 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7254418707786282e-05, "loss": 0.9193, "step": 4636 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.725304575783888e-05, "loss": 0.7831, "step": 4637 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7251672519352895e-05, "loss": 0.8796, "step": 4638 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.725029899238295e-05, "loss": 0.8838, "step": 4639 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7248925176983697e-05, "loss": 0.8067, "step": 4640 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.724755107320978e-05, "loss": 0.758, "step": 4641 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7246176681115865e-05, "loss": 0.7431, "step": 4642 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7244802000756628e-05, "loss": 0.8627, "step": 4643 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7243427032186756e-05, "loss": 0.7298, "step": 4644 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7242051775460948e-05, "loss": 0.7578, "step": 4645 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7240676230633916e-05, "loss": 0.7664, "step": 4646 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7239300397760373e-05, "loss": 0.8139, "step": 4647 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7237924276895066e-05, "loss": 0.9197, "step": 4648 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.723654786809273e-05, "loss": 0.8411, "step": 4649 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7235171171408123e-05, "loss": 0.8792, "step": 4650 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7233794186896012e-05, "loss": 0.7966, "step": 4651 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7232416914611175e-05, "loss": 0.7007, "step": 4652 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7231039354608407e-05, "loss": 0.7617, "step": 4653 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7229661506942502e-05, "loss": 0.8367, "step": 4654 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7228283371668278e-05, "loss": 0.7811, "step": 4655 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7226904948840558e-05, "loss": 0.8334, "step": 4656 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7225526238514182e-05, "loss": 0.8331, "step": 4657 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.722414724074399e-05, "loss": 0.9681, "step": 4658 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7222767955584848e-05, "loss": 0.8174, "step": 4659 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7221388383091624e-05, "loss": 0.7968, "step": 4660 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7220008523319198e-05, "loss": 0.8342, "step": 4661 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7218628376322462e-05, "loss": 0.7683, "step": 4662 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7217247942156326e-05, "loss": 0.9713, "step": 4663 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.72158672208757e-05, "loss": 0.7311, "step": 4664 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.721448621253552e-05, "loss": 0.9953, "step": 4665 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7213104917190718e-05, "loss": 0.8141, "step": 4666 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.721172333489624e-05, "loss": 0.8013, "step": 4667 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.721034146570706e-05, "loss": 0.7857, "step": 4668 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.720895930967814e-05, "loss": 0.8176, "step": 4669 }, { "epoch": 0.84, "grad_norm": 0.0, "learning_rate": 1.7207576866864474e-05, "loss": 0.8861, "step": 4670 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7206194137321047e-05, "loss": 0.7667, "step": 4671 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7204811121102877e-05, "loss": 0.8846, "step": 4672 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7203427818264973e-05, "loss": 0.8359, "step": 4673 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7202044228862376e-05, "loss": 0.7098, "step": 4674 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7200660352950118e-05, "loss": 0.921, "step": 4675 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7199276190583258e-05, "loss": 1.0, "step": 4676 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7197891741816858e-05, "loss": 0.9023, "step": 4677 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.719650700670599e-05, "loss": 0.758, "step": 4678 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7195121985305748e-05, "loss": 0.7975, "step": 4679 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7193736677671226e-05, "loss": 0.803, "step": 4680 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7192351083857537e-05, "loss": 0.7158, "step": 4681 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7190965203919804e-05, "loss": 0.9278, "step": 4682 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.718957903791315e-05, "loss": 0.7161, "step": 4683 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7188192585892728e-05, "loss": 0.736, "step": 4684 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7186805847913692e-05, "loss": 0.8124, "step": 4685 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7185418824031204e-05, "loss": 0.763, "step": 4686 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.718403151430045e-05, "loss": 0.7341, "step": 4687 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7182643918776616e-05, "loss": 0.7204, "step": 4688 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.71812560375149e-05, "loss": 0.8305, "step": 4689 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7179867870570517e-05, "loss": 0.8008, "step": 4690 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7178479417998692e-05, "loss": 0.8498, "step": 4691 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7177090679854655e-05, "loss": 0.8517, "step": 4692 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7175701656193658e-05, "loss": 0.6979, "step": 4693 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7174312347070956e-05, "loss": 0.7937, "step": 4694 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7172922752541817e-05, "loss": 0.9706, "step": 4695 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7171532872661526e-05, "loss": 0.6873, "step": 4696 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.717014270748537e-05, "loss": 0.8344, "step": 4697 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7168752257068654e-05, "loss": 0.7912, "step": 4698 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7167361521466694e-05, "loss": 0.8433, "step": 4699 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7165970500734812e-05, "loss": 0.8795, "step": 4700 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7164579194928347e-05, "loss": 0.9623, "step": 4701 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7163187604102647e-05, "loss": 0.8478, "step": 4702 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.716179572831308e-05, "loss": 0.8267, "step": 4703 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7160403567615003e-05, "loss": 0.7444, "step": 4704 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.715901112206381e-05, "loss": 0.7707, "step": 4705 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.715761839171489e-05, "loss": 0.7385, "step": 4706 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7156225376623644e-05, "loss": 0.8838, "step": 4707 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7154832076845497e-05, "loss": 0.7383, "step": 4708 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7153438492435873e-05, "loss": 0.8847, "step": 4709 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.715204462345021e-05, "loss": 0.8194, "step": 4710 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.715065046994396e-05, "loss": 0.8682, "step": 4711 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.714925603197259e-05, "loss": 0.7483, "step": 4712 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.714786130959156e-05, "loss": 0.8881, "step": 4713 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7146466302856366e-05, "loss": 0.7657, "step": 4714 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.71450710118225e-05, "loss": 0.7518, "step": 4715 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7143675436545465e-05, "loss": 0.8233, "step": 4716 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.714227957708079e-05, "loss": 0.8796, "step": 4717 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7140883433483992e-05, "loss": 0.8584, "step": 4718 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.713948700581062e-05, "loss": 0.8014, "step": 4719 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7138090294116226e-05, "loss": 0.8847, "step": 4720 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.713669329845637e-05, "loss": 0.763, "step": 4721 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.713529601888663e-05, "loss": 0.69, "step": 4722 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.713389845546259e-05, "loss": 0.771, "step": 4723 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7132500608239847e-05, "loss": 0.8599, "step": 4724 }, { "epoch": 0.85, "grad_norm": 0.0, "learning_rate": 1.7131102477274014e-05, "loss": 0.8285, "step": 4725 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7129704062620706e-05, "loss": 0.7523, "step": 4726 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7128305364335556e-05, "loss": 0.916, "step": 4727 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7126906382474207e-05, "loss": 0.9574, "step": 4728 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7125507117092316e-05, "loss": 0.9746, "step": 4729 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7124107568245544e-05, "loss": 0.8038, "step": 4730 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7122707735989568e-05, "loss": 0.8704, "step": 4731 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7121307620380077e-05, "loss": 0.8826, "step": 4732 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.711990722147277e-05, "loss": 0.8054, "step": 4733 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7118506539323355e-05, "loss": 0.8441, "step": 4734 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7117105573987556e-05, "loss": 0.9811, "step": 4735 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7115704325521108e-05, "loss": 0.9414, "step": 4736 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7114302793979745e-05, "loss": 0.903, "step": 4737 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7112900979419237e-05, "loss": 0.8911, "step": 4738 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7111498881895338e-05, "loss": 0.8267, "step": 4739 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7110096501463833e-05, "loss": 0.9273, "step": 4740 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7108693838180506e-05, "loss": 0.9046, "step": 4741 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7107290892101162e-05, "loss": 0.6695, "step": 4742 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7105887663281614e-05, "loss": 0.8555, "step": 4743 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7104484151777677e-05, "loss": 0.9062, "step": 4744 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7103080357645192e-05, "loss": 1.0009, "step": 4745 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7101676280940002e-05, "loss": 0.8071, "step": 4746 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7100271921717963e-05, "loss": 0.6676, "step": 4747 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7098867280034947e-05, "loss": 0.8442, "step": 4748 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7097462355946825e-05, "loss": 0.8528, "step": 4749 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7096057149509494e-05, "loss": 0.7995, "step": 4750 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7094651660778852e-05, "loss": 0.7997, "step": 4751 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7093245889810816e-05, "loss": 0.8454, "step": 4752 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7091839836661305e-05, "loss": 0.7926, "step": 4753 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7090433501386254e-05, "loss": 0.9238, "step": 4754 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7089026884041612e-05, "loss": 0.775, "step": 4755 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.708761998468334e-05, "loss": 0.8602, "step": 4756 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.70862128033674e-05, "loss": 0.8125, "step": 4757 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.708480534014978e-05, "loss": 0.9044, "step": 4758 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7083397595086465e-05, "loss": 0.9759, "step": 4759 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7081989568233455e-05, "loss": 0.9053, "step": 4760 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.708058125964677e-05, "loss": 0.8043, "step": 4761 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7079172669382434e-05, "loss": 0.8901, "step": 4762 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.707776379749648e-05, "loss": 0.7453, "step": 4763 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.707635464404496e-05, "loss": 0.822, "step": 4764 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.707494520908393e-05, "loss": 0.9119, "step": 4765 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7073535492669457e-05, "loss": 0.973, "step": 4766 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7072125494857624e-05, "loss": 0.7628, "step": 4767 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7070715215704526e-05, "loss": 0.8137, "step": 4768 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.706930465526626e-05, "loss": 0.8472, "step": 4769 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.706789381359895e-05, "loss": 0.6264, "step": 4770 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7066482690758713e-05, "loss": 0.8668, "step": 4771 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.706507128680169e-05, "loss": 0.7604, "step": 4772 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7063659601784026e-05, "loss": 0.8313, "step": 4773 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7062247635761886e-05, "loss": 0.8252, "step": 4774 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7060835388791433e-05, "loss": 0.7879, "step": 4775 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7059422860928853e-05, "loss": 0.7301, "step": 4776 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.705801005223034e-05, "loss": 0.7908, "step": 4777 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7056596962752096e-05, "loss": 0.7899, "step": 4778 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7055183592550334e-05, "loss": 0.8151, "step": 4779 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7053769941681287e-05, "loss": 0.7124, "step": 4780 }, { "epoch": 0.86, "grad_norm": 0.0, "learning_rate": 1.7052356010201183e-05, "loss": 0.7956, "step": 4781 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7050941798166277e-05, "loss": 0.819, "step": 4782 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.704952730563283e-05, "loss": 0.8133, "step": 4783 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7048112532657106e-05, "loss": 0.7095, "step": 4784 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7046697479295395e-05, "loss": 0.8152, "step": 4785 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7045282145603983e-05, "loss": 0.7991, "step": 4786 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.704386653163918e-05, "loss": 0.7683, "step": 4787 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.70424506374573e-05, "loss": 0.9069, "step": 4788 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.704103446311467e-05, "loss": 0.8375, "step": 4789 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7039618008667625e-05, "loss": 0.849, "step": 4790 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7038201274172515e-05, "loss": 0.7172, "step": 4791 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7036784259685704e-05, "loss": 0.8906, "step": 4792 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.703536696526356e-05, "loss": 0.7608, "step": 4793 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7033949390962467e-05, "loss": 0.8095, "step": 4794 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7032531536838814e-05, "loss": 0.8308, "step": 4795 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7031113402949012e-05, "loss": 0.9875, "step": 4796 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7029694989349472e-05, "loss": 0.836, "step": 4797 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7028276296096625e-05, "loss": 0.8781, "step": 4798 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7026857323246902e-05, "loss": 0.7495, "step": 4799 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7025438070856764e-05, "loss": 0.8735, "step": 4800 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.702401853898266e-05, "loss": 0.749, "step": 4801 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7022598727681065e-05, "loss": 0.727, "step": 4802 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7021178637008464e-05, "loss": 0.7679, "step": 4803 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7019758267021347e-05, "loss": 0.9597, "step": 4804 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7018337617776223e-05, "loss": 0.8919, "step": 4805 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7016916689329605e-05, "loss": 0.7823, "step": 4806 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.701549548173802e-05, "loss": 0.8353, "step": 4807 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7014073995058006e-05, "loss": 0.884, "step": 4808 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7012652229346116e-05, "loss": 0.7085, "step": 4809 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.70112301846589e-05, "loss": 0.8245, "step": 4810 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7009807861052942e-05, "loss": 0.7702, "step": 4811 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7008385258584817e-05, "loss": 0.7425, "step": 4812 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7006962377311122e-05, "loss": 0.8112, "step": 4813 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.700553921728846e-05, "loss": 0.806, "step": 4814 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7004115778573447e-05, "loss": 0.8937, "step": 4815 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.700269206122271e-05, "loss": 0.7057, "step": 4816 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.7001268065292883e-05, "loss": 0.8231, "step": 4817 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.699984379084062e-05, "loss": 0.7595, "step": 4818 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.699841923792258e-05, "loss": 0.7354, "step": 4819 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6996994406595432e-05, "loss": 0.7767, "step": 4820 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6995569296915865e-05, "loss": 0.8529, "step": 4821 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.699414390894056e-05, "loss": 0.7405, "step": 4822 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6992718242726236e-05, "loss": 0.655, "step": 4823 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6991292298329594e-05, "loss": 0.9036, "step": 4824 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6989866075807374e-05, "loss": 0.758, "step": 4825 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.69884395752163e-05, "loss": 0.8781, "step": 4826 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6987012796613136e-05, "loss": 0.9076, "step": 4827 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6985585740054632e-05, "loss": 0.7059, "step": 4828 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6984158405597557e-05, "loss": 0.7556, "step": 4829 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6982730793298696e-05, "loss": 0.9622, "step": 4830 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6981302903214845e-05, "loss": 0.8561, "step": 4831 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6979874735402802e-05, "loss": 0.8527, "step": 4832 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6978446289919387e-05, "loss": 0.9197, "step": 4833 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6977017566821424e-05, "loss": 0.824, "step": 4834 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.6975588566165747e-05, "loss": 0.8765, "step": 4835 }, { "epoch": 0.87, "grad_norm": 0.0, "learning_rate": 1.697415928800921e-05, "loss": 0.8289, "step": 4836 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6972729732408665e-05, "loss": 0.8095, "step": 4837 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.697129989942099e-05, "loss": 0.931, "step": 4838 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6969869789103063e-05, "loss": 0.8287, "step": 4839 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6968439401511772e-05, "loss": 0.921, "step": 4840 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.696700873670403e-05, "loss": 0.7368, "step": 4841 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.696557779473674e-05, "loss": 0.8588, "step": 4842 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6964146575666835e-05, "loss": 0.8137, "step": 4843 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6962715079551248e-05, "loss": 0.7788, "step": 4844 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.696128330644693e-05, "loss": 0.8604, "step": 4845 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.695985125641083e-05, "loss": 0.9293, "step": 4846 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6958418929499932e-05, "loss": 0.8807, "step": 4847 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6956986325771205e-05, "loss": 0.926, "step": 4848 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.695555344528164e-05, "loss": 0.7554, "step": 4849 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.695412028808825e-05, "loss": 0.7347, "step": 4850 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.695268685424804e-05, "loss": 0.7588, "step": 4851 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6951253143818035e-05, "loss": 0.8576, "step": 4852 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6949819156855272e-05, "loss": 0.8805, "step": 4853 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.69483848934168e-05, "loss": 0.8805, "step": 4854 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.694695035355967e-05, "loss": 0.8598, "step": 4855 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6945515537340958e-05, "loss": 0.8145, "step": 4856 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6944080444817737e-05, "loss": 0.7975, "step": 4857 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6942645076047098e-05, "loss": 0.8239, "step": 4858 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6941209431086147e-05, "loss": 0.7742, "step": 4859 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6939773509991995e-05, "loss": 0.8477, "step": 4860 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6938337312821764e-05, "loss": 0.8086, "step": 4861 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6936900839632583e-05, "loss": 0.9367, "step": 4862 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.693546409048161e-05, "loss": 0.8282, "step": 4863 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6934027065425992e-05, "loss": 1.0786, "step": 4864 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.69325897645229e-05, "loss": 0.8947, "step": 4865 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.693115218782951e-05, "loss": 0.757, "step": 4866 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.692971433540301e-05, "loss": 0.7509, "step": 4867 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6928276207300603e-05, "loss": 0.9723, "step": 4868 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6926837803579503e-05, "loss": 0.7203, "step": 4869 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6925399124296928e-05, "loss": 0.8868, "step": 4870 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.692396016951011e-05, "loss": 0.8334, "step": 4871 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6922520939276296e-05, "loss": 0.7778, "step": 4872 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.692108143365274e-05, "loss": 0.798, "step": 4873 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6919641652696706e-05, "loss": 0.8534, "step": 4874 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6918201596465476e-05, "loss": 0.8091, "step": 4875 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6916761265016335e-05, "loss": 0.8602, "step": 4876 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.691532065840658e-05, "loss": 0.8701, "step": 4877 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6913879776693522e-05, "loss": 0.7393, "step": 4878 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6912438619934485e-05, "loss": 0.8415, "step": 4879 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6910997188186797e-05, "loss": 0.8279, "step": 4880 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6909555481507803e-05, "loss": 0.8549, "step": 4881 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.690811349995485e-05, "loss": 0.7879, "step": 4882 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6906671243585313e-05, "loss": 0.8242, "step": 4883 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.690522871245656e-05, "loss": 1.0262, "step": 4884 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.690378590662598e-05, "loss": 0.82, "step": 4885 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.690234282615097e-05, "loss": 0.895, "step": 4886 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6900899471088937e-05, "loss": 0.8122, "step": 4887 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6899455841497306e-05, "loss": 0.7457, "step": 4888 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6898011937433497e-05, "loss": 0.8498, "step": 4889 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.6896567758954958e-05, "loss": 0.7953, "step": 4890 }, { "epoch": 0.88, "grad_norm": 0.0, "learning_rate": 1.689512330611914e-05, "loss": 0.8252, "step": 4891 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.68936785789835e-05, "loss": 1.0002, "step": 4892 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.689223357760552e-05, "loss": 0.8247, "step": 4893 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.689078830204268e-05, "loss": 0.7737, "step": 4894 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.688934275235248e-05, "loss": 0.9305, "step": 4895 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6887896928592422e-05, "loss": 0.9534, "step": 4896 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6886450830820023e-05, "loss": 0.8653, "step": 4897 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6885004459092816e-05, "loss": 0.8934, "step": 4898 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6883557813468333e-05, "loss": 0.8097, "step": 4899 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.688211089400413e-05, "loss": 0.7755, "step": 4900 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6880663700757765e-05, "loss": 0.7489, "step": 4901 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.687921623378681e-05, "loss": 0.8358, "step": 4902 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6877768493148852e-05, "loss": 0.8112, "step": 4903 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.687632047890148e-05, "loss": 0.7847, "step": 4904 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6874872191102295e-05, "loss": 0.784, "step": 4905 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6873423629808922e-05, "loss": 0.6939, "step": 4906 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6871974795078978e-05, "loss": 0.7745, "step": 4907 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6870525686970106e-05, "loss": 0.9016, "step": 4908 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6869076305539955e-05, "loss": 0.7294, "step": 4909 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.686762665084618e-05, "loss": 0.664, "step": 4910 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.686617672294645e-05, "loss": 0.7556, "step": 4911 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.686472652189845e-05, "loss": 0.9898, "step": 4912 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6863276047759867e-05, "loss": 0.7886, "step": 4913 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6861825300588405e-05, "loss": 0.8346, "step": 4914 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6860374280441778e-05, "loss": 0.913, "step": 4915 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.685892298737771e-05, "loss": 0.7875, "step": 4916 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6857471421453938e-05, "loss": 0.8296, "step": 4917 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6856019582728206e-05, "loss": 0.8471, "step": 4918 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6854567471258265e-05, "loss": 0.7867, "step": 4919 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.685311508710189e-05, "loss": 0.7926, "step": 4920 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6851662430316857e-05, "loss": 0.8142, "step": 4921 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6850209500960955e-05, "loss": 0.8635, "step": 4922 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6848756299091982e-05, "loss": 0.7116, "step": 4923 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.684730282476775e-05, "loss": 1.0283, "step": 4924 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6845849078046085e-05, "loss": 0.822, "step": 4925 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6844395058984817e-05, "loss": 0.7539, "step": 4926 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6842940767641783e-05, "loss": 0.6966, "step": 4927 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6841486204074848e-05, "loss": 0.8692, "step": 4928 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.684003136834187e-05, "loss": 0.7406, "step": 4929 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6838576260500727e-05, "loss": 0.8526, "step": 4930 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6837120880609303e-05, "loss": 0.7844, "step": 4931 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.68356652287255e-05, "loss": 0.7034, "step": 4932 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6834209304907223e-05, "loss": 0.8859, "step": 4933 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6832753109212394e-05, "loss": 0.7858, "step": 4934 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.683129664169894e-05, "loss": 0.6563, "step": 4935 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6829839902424802e-05, "loss": 0.8397, "step": 4936 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6828382891447933e-05, "loss": 0.8288, "step": 4937 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6826925608826295e-05, "loss": 0.875, "step": 4938 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.682546805461786e-05, "loss": 0.8774, "step": 4939 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6824010228880612e-05, "loss": 0.7106, "step": 4940 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.682255213167255e-05, "loss": 0.8774, "step": 4941 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.682109376305168e-05, "loss": 0.793, "step": 4942 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6819635123076008e-05, "loss": 0.8173, "step": 4943 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6818176211803572e-05, "loss": 0.9534, "step": 4944 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6816717029292403e-05, "loss": 0.7811, "step": 4945 }, { "epoch": 0.89, "grad_norm": 0.0, "learning_rate": 1.6815257575600555e-05, "loss": 0.8466, "step": 4946 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6813797850786086e-05, "loss": 0.9735, "step": 4947 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6812337854907067e-05, "loss": 0.8154, "step": 4948 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6810877588021574e-05, "loss": 0.7561, "step": 4949 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6809417050187704e-05, "loss": 0.9465, "step": 4950 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.680795624146356e-05, "loss": 1.0132, "step": 4951 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6806495161907253e-05, "loss": 0.8023, "step": 4952 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.680503381157691e-05, "loss": 0.6916, "step": 4953 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6803572190530662e-05, "loss": 0.7768, "step": 4954 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6802110298826658e-05, "loss": 0.8726, "step": 4955 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6800648136523054e-05, "loss": 0.7809, "step": 4956 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6799185703678017e-05, "loss": 0.834, "step": 4957 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6797723000349723e-05, "loss": 0.9355, "step": 4958 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6796260026596368e-05, "loss": 0.7657, "step": 4959 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6794796782476144e-05, "loss": 0.7993, "step": 4960 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.679333326804726e-05, "loss": 0.7505, "step": 4961 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6791869483367943e-05, "loss": 1.0009, "step": 4962 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6790405428496426e-05, "loss": 0.81, "step": 4963 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6788941103490946e-05, "loss": 0.9008, "step": 4964 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.678747650840976e-05, "loss": 0.7879, "step": 4965 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.678601164331113e-05, "loss": 0.8771, "step": 4966 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6784546508253332e-05, "loss": 0.9115, "step": 4967 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.678308110329465e-05, "loss": 0.6431, "step": 4968 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.678161542849338e-05, "loss": 0.8466, "step": 4969 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6780149483907835e-05, "loss": 0.7738, "step": 4970 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.677868326959633e-05, "loss": 0.7208, "step": 4971 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6777216785617187e-05, "loss": 0.838, "step": 4972 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.677575003202875e-05, "loss": 0.8723, "step": 4973 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6774283008889373e-05, "loss": 0.8836, "step": 4974 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6772815716257414e-05, "loss": 0.912, "step": 4975 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.677134815419124e-05, "loss": 0.7446, "step": 4976 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6769880322749235e-05, "loss": 0.733, "step": 4977 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6768412221989798e-05, "loss": 0.8081, "step": 4978 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6766943851971323e-05, "loss": 0.8853, "step": 4979 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6765475212752232e-05, "loss": 1.1112, "step": 4980 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6764006304390946e-05, "loss": 0.8418, "step": 4981 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6762537126945904e-05, "loss": 0.8012, "step": 4982 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.676106768047555e-05, "loss": 0.8195, "step": 4983 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6759597965038338e-05, "loss": 0.9027, "step": 4984 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.675812798069274e-05, "loss": 0.7699, "step": 4985 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6756657727497236e-05, "loss": 0.8469, "step": 4986 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.675518720551031e-05, "loss": 0.7386, "step": 4987 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6753716414790468e-05, "loss": 0.7238, "step": 4988 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6752245355396212e-05, "loss": 0.8358, "step": 4989 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6750774027386074e-05, "loss": 0.8174, "step": 4990 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.674930243081858e-05, "loss": 0.7818, "step": 4991 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.674783056575227e-05, "loss": 0.8869, "step": 4992 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.67463584322457e-05, "loss": 0.8597, "step": 4993 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.674488603035744e-05, "loss": 0.848, "step": 4994 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6743413360146058e-05, "loss": 0.8488, "step": 4995 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6741940421670134e-05, "loss": 0.847, "step": 4996 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6740467214988275e-05, "loss": 0.9887, "step": 4997 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.673899374015908e-05, "loss": 0.6588, "step": 4998 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6737519997241175e-05, "loss": 0.8409, "step": 4999 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.673604598629318e-05, "loss": 0.8337, "step": 5000 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.6734571707373736e-05, "loss": 0.8232, "step": 5001 }, { "epoch": 0.9, "grad_norm": 0.0, "learning_rate": 1.673309716054149e-05, "loss": 0.8232, "step": 5002 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6731622345855107e-05, "loss": 0.942, "step": 5003 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6730147263373256e-05, "loss": 0.666, "step": 5004 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6728671913154615e-05, "loss": 0.7683, "step": 5005 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.672719629525788e-05, "loss": 0.7677, "step": 5006 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6725720409741752e-05, "loss": 0.7336, "step": 5007 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6724244256664944e-05, "loss": 0.8519, "step": 5008 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6722767836086183e-05, "loss": 0.6864, "step": 5009 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6721291148064196e-05, "loss": 0.792, "step": 5010 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6719814192657736e-05, "loss": 0.7824, "step": 5011 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6718336969925557e-05, "loss": 0.8434, "step": 5012 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.671685947992642e-05, "loss": 0.932, "step": 5013 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6715381722719106e-05, "loss": 0.7342, "step": 5014 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6713903698362406e-05, "loss": 0.8889, "step": 5015 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6712425406915117e-05, "loss": 0.8071, "step": 5016 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6710946848436042e-05, "loss": 0.7972, "step": 5017 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6709468022984004e-05, "loss": 0.8563, "step": 5018 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6707988930617837e-05, "loss": 0.8665, "step": 5019 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.670650957139638e-05, "loss": 0.8609, "step": 5020 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6705029945378483e-05, "loss": 0.781, "step": 5021 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6703550052623006e-05, "loss": 0.7101, "step": 5022 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6702069893188825e-05, "loss": 0.8723, "step": 5023 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6700589467134825e-05, "loss": 0.873, "step": 5024 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6699108774519896e-05, "loss": 0.7797, "step": 5025 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6697627815402943e-05, "loss": 0.9279, "step": 5026 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.669614658984288e-05, "loss": 0.8648, "step": 5027 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6694665097898637e-05, "loss": 0.6966, "step": 5028 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6693183339629148e-05, "loss": 0.8033, "step": 5029 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6691701315093357e-05, "loss": 0.7439, "step": 5030 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6690219024350226e-05, "loss": 0.8846, "step": 5031 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.668873646745872e-05, "loss": 0.9981, "step": 5032 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6687253644477822e-05, "loss": 0.7342, "step": 5033 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6685770555466514e-05, "loss": 0.6887, "step": 5034 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6684287200483805e-05, "loss": 0.9087, "step": 5035 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6682803579588695e-05, "loss": 0.7517, "step": 5036 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6681319692840216e-05, "loss": 0.7837, "step": 5037 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6679835540297385e-05, "loss": 0.8507, "step": 5038 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6678351122019262e-05, "loss": 0.8361, "step": 5039 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6676866438064888e-05, "loss": 0.7201, "step": 5040 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6675381488493327e-05, "loss": 0.836, "step": 5041 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6673896273363657e-05, "loss": 0.7298, "step": 5042 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.667241079273496e-05, "loss": 0.6747, "step": 5043 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.667092504666633e-05, "loss": 0.7629, "step": 5044 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6669439035216875e-05, "loss": 0.7759, "step": 5045 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.666795275844571e-05, "loss": 0.8915, "step": 5046 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6666466216411962e-05, "loss": 0.8192, "step": 5047 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6664979409174766e-05, "loss": 0.771, "step": 5048 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.666349233679327e-05, "loss": 0.8512, "step": 5049 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6662004999326634e-05, "loss": 0.7442, "step": 5050 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6660517396834025e-05, "loss": 0.7409, "step": 5051 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.665902952937463e-05, "loss": 0.9487, "step": 5052 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.665754139700763e-05, "loss": 0.7304, "step": 5053 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.6656052999792226e-05, "loss": 0.8048, "step": 5054 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.665456433778763e-05, "loss": 0.7621, "step": 5055 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.665307541105307e-05, "loss": 0.8592, "step": 5056 }, { "epoch": 0.91, "grad_norm": 0.0, "learning_rate": 1.665158621964777e-05, "loss": 0.893, "step": 5057 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6650096763630977e-05, "loss": 0.713, "step": 5058 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.664860704306194e-05, "loss": 0.6831, "step": 5059 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6647117057999926e-05, "loss": 0.8567, "step": 5060 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6645626808504207e-05, "loss": 0.9222, "step": 5061 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6644136294634075e-05, "loss": 0.931, "step": 5062 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.664264551644881e-05, "loss": 0.7234, "step": 5063 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.664115447400773e-05, "loss": 0.965, "step": 5064 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6639663167370154e-05, "loss": 0.8268, "step": 5065 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6638171596595398e-05, "loss": 0.8337, "step": 5066 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6636679761742803e-05, "loss": 0.7441, "step": 5067 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6635187662871722e-05, "loss": 0.8742, "step": 5068 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6633695300041505e-05, "loss": 0.8388, "step": 5069 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.663220267331153e-05, "loss": 0.8426, "step": 5070 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6630709782741162e-05, "loss": 0.8305, "step": 5071 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6629216628389807e-05, "loss": 0.8625, "step": 5072 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6627723210316857e-05, "loss": 0.8878, "step": 5073 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.662622952858172e-05, "loss": 0.8387, "step": 5074 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6624735583243827e-05, "loss": 0.7496, "step": 5075 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.66232413743626e-05, "loss": 0.809, "step": 5076 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6621746901997487e-05, "loss": 0.8346, "step": 5077 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6620252166207935e-05, "loss": 0.8853, "step": 5078 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6618757167053413e-05, "loss": 0.8843, "step": 5079 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.661726190459339e-05, "loss": 0.801, "step": 5080 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6615766378887356e-05, "loss": 0.9482, "step": 5081 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.66142705899948e-05, "loss": 0.9454, "step": 5082 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6612774537975233e-05, "loss": 0.8977, "step": 5083 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6611278222888163e-05, "loss": 0.9079, "step": 5084 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6609781644793117e-05, "loss": 0.8115, "step": 5085 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6608284803749636e-05, "loss": 0.7329, "step": 5086 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.660678769981727e-05, "loss": 0.8484, "step": 5087 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6605290333055565e-05, "loss": 0.8021, "step": 5088 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.66037927035241e-05, "loss": 0.7707, "step": 5089 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6602294811282443e-05, "loss": 0.7094, "step": 5090 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.660079665639019e-05, "loss": 0.8733, "step": 5091 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6599298238906937e-05, "loss": 0.8926, "step": 5092 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6597799558892294e-05, "loss": 0.7626, "step": 5093 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.659630061640589e-05, "loss": 0.8315, "step": 5094 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6594801411507337e-05, "loss": 0.7825, "step": 5095 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6593301944256292e-05, "loss": 0.8497, "step": 5096 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.65918022147124e-05, "loss": 0.8983, "step": 5097 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6590302222935323e-05, "loss": 0.814, "step": 5098 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6588801968984735e-05, "loss": 0.8197, "step": 5099 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6587301452920317e-05, "loss": 0.7748, "step": 5100 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.658580067480176e-05, "loss": 0.8696, "step": 5101 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.658429963468877e-05, "loss": 0.9708, "step": 5102 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6582798332641064e-05, "loss": 0.7405, "step": 5103 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6581296768718364e-05, "loss": 0.9541, "step": 5104 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.65797949429804e-05, "loss": 0.8834, "step": 5105 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6578292855486927e-05, "loss": 0.8469, "step": 5106 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6576790506297694e-05, "loss": 0.8502, "step": 5107 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6575287895472462e-05, "loss": 0.8159, "step": 5108 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.657378502307102e-05, "loss": 0.8877, "step": 5109 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6572281889153143e-05, "loss": 0.8171, "step": 5110 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.657077849377864e-05, "loss": 0.7614, "step": 5111 }, { "epoch": 0.92, "grad_norm": 0.0, "learning_rate": 1.6569274837007302e-05, "loss": 0.7772, "step": 5112 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6567770918898965e-05, "loss": 0.7712, "step": 5113 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.656626673951345e-05, "loss": 0.7352, "step": 5114 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6564762298910592e-05, "loss": 0.9018, "step": 5115 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6563257597150246e-05, "loss": 0.8621, "step": 5116 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6561752634292267e-05, "loss": 0.839, "step": 5117 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.656024741039653e-05, "loss": 0.7423, "step": 5118 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.655874192552291e-05, "loss": 0.7608, "step": 5119 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.65572361797313e-05, "loss": 0.8899, "step": 5120 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6555730173081608e-05, "loss": 0.7058, "step": 5121 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6554223905633734e-05, "loss": 0.6814, "step": 5122 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6552717377447603e-05, "loss": 0.7623, "step": 5123 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6551210588583153e-05, "loss": 0.8718, "step": 5124 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6549703539100323e-05, "loss": 0.8009, "step": 5125 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6548196229059065e-05, "loss": 0.8683, "step": 5126 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6546688658519343e-05, "loss": 0.8027, "step": 5127 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.654518082754113e-05, "loss": 0.8046, "step": 5128 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.654367273618441e-05, "loss": 0.9061, "step": 5129 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6542164384509184e-05, "loss": 0.8898, "step": 5130 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6540655772575447e-05, "loss": 0.9048, "step": 5131 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.653914690044322e-05, "loss": 0.7497, "step": 5132 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6537637768172527e-05, "loss": 0.9167, "step": 5133 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6536128375823405e-05, "loss": 0.8291, "step": 5134 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6534618723455897e-05, "loss": 0.8189, "step": 5135 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6533108811130064e-05, "loss": 0.8844, "step": 5136 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6531598638905972e-05, "loss": 0.7024, "step": 5137 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6530088206843694e-05, "loss": 0.8732, "step": 5138 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.652857751500332e-05, "loss": 0.7801, "step": 5139 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.652706656344495e-05, "loss": 0.7958, "step": 5140 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.652555535222869e-05, "loss": 1.0303, "step": 5141 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.652404388141466e-05, "loss": 0.8187, "step": 5142 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6522532151062987e-05, "loss": 0.8038, "step": 5143 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.652102016123381e-05, "loss": 0.7624, "step": 5144 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.651950791198728e-05, "loss": 0.8765, "step": 5145 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.651799540338356e-05, "loss": 0.922, "step": 5146 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6516482635482817e-05, "loss": 0.9168, "step": 5147 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6514969608345228e-05, "loss": 0.7882, "step": 5148 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.651345632203099e-05, "loss": 0.8231, "step": 5149 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.65119427766003e-05, "loss": 0.7569, "step": 5150 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.651042897211337e-05, "loss": 0.7731, "step": 5151 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6508914908630425e-05, "loss": 0.8381, "step": 5152 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6507400586211693e-05, "loss": 0.7303, "step": 5153 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6505886004917418e-05, "loss": 0.8677, "step": 5154 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6504371164807852e-05, "loss": 0.9087, "step": 5155 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6502856065943257e-05, "loss": 0.8712, "step": 5156 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6501340708383913e-05, "loss": 0.826, "step": 5157 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6499825092190093e-05, "loss": 0.8406, "step": 5158 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6498309217422098e-05, "loss": 0.9034, "step": 5159 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6496793084140226e-05, "loss": 0.7894, "step": 5160 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.64952766924048e-05, "loss": 0.7055, "step": 5161 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.649376004227614e-05, "loss": 0.8016, "step": 5162 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6492243133814582e-05, "loss": 0.6419, "step": 5163 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.649072596708047e-05, "loss": 0.7014, "step": 5164 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6489208542134158e-05, "loss": 0.8317, "step": 5165 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.6487690859036015e-05, "loss": 0.7755, "step": 5166 }, { "epoch": 0.93, "grad_norm": 0.0, "learning_rate": 1.648617291784641e-05, "loss": 0.8506, "step": 5167 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6484654718625743e-05, "loss": 0.8476, "step": 5168 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.64831362614344e-05, "loss": 0.7715, "step": 5169 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.648161754633279e-05, "loss": 0.9227, "step": 5170 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.648009857338133e-05, "loss": 0.881, "step": 5171 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6478579342640447e-05, "loss": 0.8466, "step": 5172 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6477059854170582e-05, "loss": 0.7922, "step": 5173 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6475540108032176e-05, "loss": 0.9035, "step": 5174 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6474020104285693e-05, "loss": 0.9014, "step": 5175 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.64724998429916e-05, "loss": 0.7991, "step": 5176 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6470979324210376e-05, "loss": 0.8367, "step": 5177 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6469458548002508e-05, "loss": 0.818, "step": 5178 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6467937514428496e-05, "loss": 0.7565, "step": 5179 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.646641622354885e-05, "loss": 0.8724, "step": 5180 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.646489467542409e-05, "loss": 0.8267, "step": 5181 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6463372870114743e-05, "loss": 0.762, "step": 5182 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6461850807681354e-05, "loss": 0.7536, "step": 5183 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6460328488184467e-05, "loss": 0.8148, "step": 5184 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6458805911684644e-05, "loss": 0.8858, "step": 5185 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6457283078242462e-05, "loss": 0.72, "step": 5186 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6455759987918494e-05, "loss": 0.7745, "step": 5187 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6454236640773338e-05, "loss": 0.812, "step": 5188 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6452713036867588e-05, "loss": 0.9701, "step": 5189 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.645118917626186e-05, "loss": 0.8023, "step": 5190 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6449665059016774e-05, "loss": 0.8258, "step": 5191 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6448140685192963e-05, "loss": 0.8021, "step": 5192 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6446616054851072e-05, "loss": 0.9364, "step": 5193 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.644509116805175e-05, "loss": 0.739, "step": 5194 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6443566024855663e-05, "loss": 0.8553, "step": 5195 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6442040625323475e-05, "loss": 0.8422, "step": 5196 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6440514969515878e-05, "loss": 0.9558, "step": 5197 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.643898905749356e-05, "loss": 0.8177, "step": 5198 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.643746288931723e-05, "loss": 0.7552, "step": 5199 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6435936465047598e-05, "loss": 0.8896, "step": 5200 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6434409784745384e-05, "loss": 0.9645, "step": 5201 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6432882848471332e-05, "loss": 0.7333, "step": 5202 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.643135565628618e-05, "loss": 0.8422, "step": 5203 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6429828208250676e-05, "loss": 0.8664, "step": 5204 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6428300504425596e-05, "loss": 0.8946, "step": 5205 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6426772544871707e-05, "loss": 0.9646, "step": 5206 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6425244329649797e-05, "loss": 0.9568, "step": 5207 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6423715858820662e-05, "loss": 0.9222, "step": 5208 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6422187132445105e-05, "loss": 0.8976, "step": 5209 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6420658150583944e-05, "loss": 0.7917, "step": 5210 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6419128913297998e-05, "loss": 0.9429, "step": 5211 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6417599420648113e-05, "loss": 1.0287, "step": 5212 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6416069672695124e-05, "loss": 0.9766, "step": 5213 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6414539669499897e-05, "loss": 0.7878, "step": 5214 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.641300941112329e-05, "loss": 0.7732, "step": 5215 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.641147889762618e-05, "loss": 0.9282, "step": 5216 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.640994812906946e-05, "loss": 0.8571, "step": 5217 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6408417105514024e-05, "loss": 0.8019, "step": 5218 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6406885827020773e-05, "loss": 0.8152, "step": 5219 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.6405354293650627e-05, "loss": 0.8664, "step": 5220 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.640382250546452e-05, "loss": 0.8586, "step": 5221 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.640229046252338e-05, "loss": 0.9219, "step": 5222 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 1.640075816488816e-05, "loss": 0.7255, "step": 5223 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6399225612619814e-05, "loss": 0.6951, "step": 5224 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6397692805779307e-05, "loss": 0.7285, "step": 5225 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6396159744427623e-05, "loss": 0.8707, "step": 5226 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6394626428625748e-05, "loss": 0.9006, "step": 5227 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.639309285843468e-05, "loss": 0.7579, "step": 5228 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.639155903391542e-05, "loss": 0.939, "step": 5229 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6390024955129e-05, "loss": 0.7751, "step": 5230 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6388490622136437e-05, "loss": 0.7072, "step": 5231 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6386956034998775e-05, "loss": 0.75, "step": 5232 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6385421193777057e-05, "loss": 0.8208, "step": 5233 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.638388609853235e-05, "loss": 0.7516, "step": 5234 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6382350749325712e-05, "loss": 0.7694, "step": 5235 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.638081514621823e-05, "loss": 0.8754, "step": 5236 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6379279289270994e-05, "loss": 0.8534, "step": 5237 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6377743178545094e-05, "loss": 0.8891, "step": 5238 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.637620681410165e-05, "loss": 0.8886, "step": 5239 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6374670196001773e-05, "loss": 0.7378, "step": 5240 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6373133324306592e-05, "loss": 0.8499, "step": 5241 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6371596199077252e-05, "loss": 0.806, "step": 5242 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6370058820374906e-05, "loss": 0.9318, "step": 5243 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.63685211882607e-05, "loss": 0.8238, "step": 5244 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6366983302795815e-05, "loss": 0.8303, "step": 5245 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6365445164041426e-05, "loss": 0.9193, "step": 5246 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.636390677205872e-05, "loss": 0.8292, "step": 5247 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6362368126908904e-05, "loss": 0.7793, "step": 5248 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6360829228653187e-05, "loss": 0.7091, "step": 5249 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6359290077352784e-05, "loss": 0.7903, "step": 5250 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.635775067306893e-05, "loss": 0.8069, "step": 5251 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6356211015862858e-05, "loss": 0.7716, "step": 5252 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6354671105795826e-05, "loss": 0.8167, "step": 5253 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.635313094292909e-05, "loss": 0.7431, "step": 5254 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6351590527323923e-05, "loss": 0.8229, "step": 5255 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6350049859041606e-05, "loss": 0.8597, "step": 5256 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6348508938143422e-05, "loss": 0.8105, "step": 5257 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.634696776469068e-05, "loss": 0.7868, "step": 5258 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6345426338744687e-05, "loss": 0.7022, "step": 5259 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6343884660366765e-05, "loss": 0.8125, "step": 5260 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6342342729618242e-05, "loss": 0.8326, "step": 5261 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6340800546560462e-05, "loss": 0.9112, "step": 5262 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6339258111254772e-05, "loss": 0.7978, "step": 5263 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6337715423762536e-05, "loss": 0.8167, "step": 5264 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.633617248414512e-05, "loss": 0.8953, "step": 5265 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6334629292463913e-05, "loss": 0.9478, "step": 5266 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.63330858487803e-05, "loss": 0.9315, "step": 5267 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.633154215315568e-05, "loss": 0.8456, "step": 5268 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6329998205651464e-05, "loss": 0.8837, "step": 5269 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6328454006329083e-05, "loss": 0.884, "step": 5270 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6326909555249954e-05, "loss": 0.8702, "step": 5271 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.632536485247553e-05, "loss": 0.9967, "step": 5272 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.632381989806725e-05, "loss": 0.8826, "step": 5273 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6322274692086586e-05, "loss": 0.9029, "step": 5274 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6320729234595e-05, "loss": 0.9169, "step": 5275 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.631918352565398e-05, "loss": 0.9014, "step": 5276 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6317637565325014e-05, "loss": 0.7716, "step": 5277 }, { "epoch": 0.95, "grad_norm": 0.0, "learning_rate": 1.6316091353669605e-05, "loss": 0.7987, "step": 5278 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6314544890749258e-05, "loss": 0.7828, "step": 5279 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.63129981766255e-05, "loss": 0.8048, "step": 5280 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6311451211359857e-05, "loss": 0.7066, "step": 5281 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6309903995013875e-05, "loss": 0.8414, "step": 5282 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6308356527649106e-05, "loss": 0.7874, "step": 5283 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.63068088093271e-05, "loss": 0.6979, "step": 5284 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6305260840109444e-05, "loss": 0.7257, "step": 5285 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.63037126200577e-05, "loss": 0.7174, "step": 5286 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6302164149233476e-05, "loss": 0.7636, "step": 5287 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6300615427698362e-05, "loss": 0.8087, "step": 5288 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6299066455513978e-05, "loss": 0.7928, "step": 5289 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6297517232741937e-05, "loss": 0.9484, "step": 5290 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.629596775944387e-05, "loss": 0.7813, "step": 5291 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6294418035681422e-05, "loss": 0.7904, "step": 5292 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6292868061516242e-05, "loss": 0.9978, "step": 5293 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.629131783700999e-05, "loss": 0.8188, "step": 5294 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6289767362224337e-05, "loss": 0.7205, "step": 5295 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6288216637220963e-05, "loss": 0.8268, "step": 5296 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6286665662061557e-05, "loss": 0.7982, "step": 5297 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6285114436807826e-05, "loss": 0.9308, "step": 5298 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.628356296152147e-05, "loss": 0.796, "step": 5299 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6282011236264222e-05, "loss": 0.8467, "step": 5300 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.62804592610978e-05, "loss": 0.7384, "step": 5301 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.627890703608395e-05, "loss": 0.7536, "step": 5302 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6277354561284423e-05, "loss": 0.724, "step": 5303 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6275801836760976e-05, "loss": 0.7337, "step": 5304 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.627424886257538e-05, "loss": 0.9307, "step": 5305 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.627269563878942e-05, "loss": 0.7424, "step": 5306 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.627114216546488e-05, "loss": 0.7794, "step": 5307 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6269588442663563e-05, "loss": 0.7534, "step": 5308 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6268034470447273e-05, "loss": 0.6858, "step": 5309 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.626648024887784e-05, "loss": 0.8795, "step": 5310 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.626492577801708e-05, "loss": 0.7732, "step": 5311 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.626337105792685e-05, "loss": 0.7542, "step": 5312 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6261816088668982e-05, "loss": 0.6926, "step": 5313 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6260260870305345e-05, "loss": 0.7091, "step": 5314 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.625870540289781e-05, "loss": 0.8674, "step": 5315 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6257149686508245e-05, "loss": 0.7745, "step": 5316 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.625559372119855e-05, "loss": 0.7796, "step": 5317 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.625403750703062e-05, "loss": 0.6874, "step": 5318 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6252481044066366e-05, "loss": 0.9558, "step": 5319 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.62509243323677e-05, "loss": 0.8538, "step": 5320 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.624936737199656e-05, "loss": 0.8352, "step": 5321 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.624781016301488e-05, "loss": 0.7856, "step": 5322 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.624625270548461e-05, "loss": 0.8752, "step": 5323 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6244694999467704e-05, "loss": 0.7081, "step": 5324 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6243137045026135e-05, "loss": 0.9244, "step": 5325 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.624157884222188e-05, "loss": 0.7518, "step": 5326 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6240020391116923e-05, "loss": 0.9036, "step": 5327 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6238461691773265e-05, "loss": 0.732, "step": 5328 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6236902744252915e-05, "loss": 0.6647, "step": 5329 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.623534354861789e-05, "loss": 0.7898, "step": 5330 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6233784104930218e-05, "loss": 0.8855, "step": 5331 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.623222441325193e-05, "loss": 0.6812, "step": 5332 }, { "epoch": 0.96, "grad_norm": 0.0, "learning_rate": 1.6230664473645085e-05, "loss": 0.8373, "step": 5333 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6229104286171732e-05, "loss": 0.9031, "step": 5334 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6227543850893938e-05, "loss": 0.787, "step": 5335 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6225983167873782e-05, "loss": 0.8511, "step": 5336 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.622442223717335e-05, "loss": 0.6696, "step": 5337 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6222861058854736e-05, "loss": 0.8953, "step": 5338 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.622129963298005e-05, "loss": 0.79, "step": 5339 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.621973795961141e-05, "loss": 0.716, "step": 5340 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6218176038810935e-05, "loss": 0.7887, "step": 5341 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6216613870640767e-05, "loss": 0.7765, "step": 5342 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.621505145516305e-05, "loss": 0.835, "step": 5343 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6213488792439936e-05, "loss": 0.7734, "step": 5344 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6211925882533597e-05, "loss": 0.7929, "step": 5345 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6210362725506202e-05, "loss": 0.8232, "step": 5346 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.620879932141994e-05, "loss": 0.9235, "step": 5347 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6207235670337004e-05, "loss": 0.8011, "step": 5348 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6205671772319604e-05, "loss": 0.7772, "step": 5349 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6204107627429947e-05, "loss": 0.8536, "step": 5350 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.620254323573026e-05, "loss": 0.8431, "step": 5351 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6200978597282775e-05, "loss": 0.9389, "step": 5352 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6199413712149742e-05, "loss": 0.7736, "step": 5353 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.619784858039341e-05, "loss": 0.7428, "step": 5354 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6196283202076043e-05, "loss": 0.9465, "step": 5355 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.619471757725992e-05, "loss": 0.8035, "step": 5356 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6193151706007313e-05, "loss": 0.8845, "step": 5357 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6191585588380525e-05, "loss": 0.7953, "step": 5358 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6190019224441855e-05, "loss": 0.7909, "step": 5359 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6188452614253617e-05, "loss": 0.867, "step": 5360 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6186885757878133e-05, "loss": 0.9225, "step": 5361 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6185318655377735e-05, "loss": 0.7438, "step": 5362 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6183751306814763e-05, "loss": 0.9068, "step": 5363 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.618218371225157e-05, "loss": 0.7601, "step": 5364 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.618061587175052e-05, "loss": 0.8851, "step": 5365 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6179047785373984e-05, "loss": 0.8291, "step": 5366 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.617747945318434e-05, "loss": 0.7942, "step": 5367 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6175910875243982e-05, "loss": 0.8922, "step": 5368 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6174342051615305e-05, "loss": 0.9651, "step": 5369 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.617277298236073e-05, "loss": 0.887, "step": 5370 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6171203667542664e-05, "loss": 0.9171, "step": 5371 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6169634107223554e-05, "loss": 0.8204, "step": 5372 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6168064301465826e-05, "loss": 1.0001, "step": 5373 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.616649425033193e-05, "loss": 0.9037, "step": 5374 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6164923953884332e-05, "loss": 0.9163, "step": 5375 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6163353412185497e-05, "loss": 0.8135, "step": 5376 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6161782625297905e-05, "loss": 0.9449, "step": 5377 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6160211593284043e-05, "loss": 0.7697, "step": 5378 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6158640316206413e-05, "loss": 0.6941, "step": 5379 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.615706879412752e-05, "loss": 0.9911, "step": 5380 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6155497027109882e-05, "loss": 0.8478, "step": 5381 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.615392501521603e-05, "loss": 0.8238, "step": 5382 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6152352758508497e-05, "loss": 0.7052, "step": 5383 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.615078025704983e-05, "loss": 0.7622, "step": 5384 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6149207510902593e-05, "loss": 0.8644, "step": 5385 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.614763452012934e-05, "loss": 0.73, "step": 5386 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.6146061284792658e-05, "loss": 0.855, "step": 5387 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.614448780495513e-05, "loss": 0.8376, "step": 5388 }, { "epoch": 0.97, "grad_norm": 0.0, "learning_rate": 1.614291408067935e-05, "loss": 0.8155, "step": 5389 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6141340112027927e-05, "loss": 0.7329, "step": 5390 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.613976589906347e-05, "loss": 0.8602, "step": 5391 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6138191441848607e-05, "loss": 0.9757, "step": 5392 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6136616740445975e-05, "loss": 0.8259, "step": 5393 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6135041794918215e-05, "loss": 0.7995, "step": 5394 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6133466605327987e-05, "loss": 0.6566, "step": 5395 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6131891171737945e-05, "loss": 0.7904, "step": 5396 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.613031549421077e-05, "loss": 0.7029, "step": 5397 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.612873957280914e-05, "loss": 0.7908, "step": 5398 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6127163407595757e-05, "loss": 0.7041, "step": 5399 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.612558699863331e-05, "loss": 0.8, "step": 5400 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6124010345984523e-05, "loss": 0.8677, "step": 5401 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6122433449712113e-05, "loss": 0.7679, "step": 5402 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.612085630987881e-05, "loss": 0.747, "step": 5403 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.611927892654736e-05, "loss": 0.6806, "step": 5404 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6117701299780507e-05, "loss": 0.8584, "step": 5405 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6116123429641023e-05, "loss": 0.7811, "step": 5406 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.611454531619167e-05, "loss": 0.7545, "step": 5407 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6112966959495223e-05, "loss": 0.8315, "step": 5408 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6111388359614482e-05, "loss": 0.8779, "step": 5409 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6109809516612243e-05, "loss": 0.7709, "step": 5410 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6108230430551314e-05, "loss": 0.9002, "step": 5411 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6106651101494515e-05, "loss": 0.7438, "step": 5412 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6105071529504676e-05, "loss": 0.788, "step": 5413 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.610349171464463e-05, "loss": 0.8675, "step": 5414 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6101911656977232e-05, "loss": 0.8507, "step": 5415 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.610033135656533e-05, "loss": 0.7019, "step": 5416 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.60987508134718e-05, "loss": 0.7865, "step": 5417 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6097170027759513e-05, "loss": 0.8809, "step": 5418 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6095588999491355e-05, "loss": 0.9919, "step": 5419 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.609400772873023e-05, "loss": 0.6559, "step": 5420 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6092426215539032e-05, "loss": 0.7708, "step": 5421 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6090844459980685e-05, "loss": 0.8466, "step": 5422 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.608926246211811e-05, "loss": 0.7428, "step": 5423 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6087680222014244e-05, "loss": 0.7363, "step": 5424 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6086097739732033e-05, "loss": 0.7932, "step": 5425 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6084515015334423e-05, "loss": 0.7704, "step": 5426 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6082932048884385e-05, "loss": 0.8143, "step": 5427 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6081348840444887e-05, "loss": 0.8017, "step": 5428 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6079765390078915e-05, "loss": 0.7774, "step": 5429 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6078181697849462e-05, "loss": 0.823, "step": 5430 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.607659776381953e-05, "loss": 0.8179, "step": 5431 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6075013588052126e-05, "loss": 0.7769, "step": 5432 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6073429170610274e-05, "loss": 0.7941, "step": 5433 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6071844511557005e-05, "loss": 0.8843, "step": 5434 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6070259610955363e-05, "loss": 0.8879, "step": 5435 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6068674468868393e-05, "loss": 0.738, "step": 5436 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6067089085359156e-05, "loss": 0.7912, "step": 5437 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6065503460490724e-05, "loss": 0.8151, "step": 5438 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.606391759432617e-05, "loss": 0.7372, "step": 5439 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6062331486928587e-05, "loss": 0.8355, "step": 5440 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6060745138361077e-05, "loss": 0.7405, "step": 5441 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6059158548686736e-05, "loss": 0.8467, "step": 5442 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6057571717968688e-05, "loss": 0.8608, "step": 5443 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 1.6055984646270065e-05, "loss": 0.8382, "step": 5444 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6054397333653998e-05, "loss": 0.7683, "step": 5445 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6052809780183628e-05, "loss": 0.8238, "step": 5446 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.605122198592212e-05, "loss": 0.8355, "step": 5447 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6049633950932634e-05, "loss": 0.7562, "step": 5448 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6048045675278344e-05, "loss": 0.7858, "step": 5449 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.604645715902244e-05, "loss": 0.8481, "step": 5450 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.604486840222811e-05, "loss": 0.9408, "step": 5451 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.604327940495856e-05, "loss": 0.8933, "step": 5452 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6041690167277e-05, "loss": 0.8525, "step": 5453 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.604010068924666e-05, "loss": 0.7937, "step": 5454 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6038510970930763e-05, "loss": 0.8819, "step": 5455 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6036921012392556e-05, "loss": 0.9132, "step": 5456 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.603533081369529e-05, "loss": 0.757, "step": 5457 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6033740374902225e-05, "loss": 0.9908, "step": 5458 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6032149696076632e-05, "loss": 0.7886, "step": 5459 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.603055877728179e-05, "loss": 0.8433, "step": 5460 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.602896761858099e-05, "loss": 0.713, "step": 5461 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.602737622003753e-05, "loss": 0.9, "step": 5462 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6025784581714718e-05, "loss": 0.8762, "step": 5463 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6024192703675873e-05, "loss": 0.8491, "step": 5464 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.602260058598432e-05, "loss": 0.7726, "step": 5465 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6021008228703395e-05, "loss": 0.8622, "step": 5466 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6019415631896457e-05, "loss": 0.8117, "step": 5467 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6017822795626848e-05, "loss": 0.8421, "step": 5468 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.601622971995794e-05, "loss": 0.7487, "step": 5469 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.601463640495311e-05, "loss": 0.8391, "step": 5470 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6013042850675737e-05, "loss": 0.8297, "step": 5471 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.601144905718922e-05, "loss": 0.9337, "step": 5472 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.600985502455696e-05, "loss": 0.7873, "step": 5473 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6008260752842373e-05, "loss": 0.7519, "step": 5474 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6006666242108878e-05, "loss": 0.8748, "step": 5475 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.600507149241991e-05, "loss": 1.0315, "step": 5476 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.6003476503838912e-05, "loss": 0.8309, "step": 5477 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.600188127642934e-05, "loss": 0.8128, "step": 5478 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.600028581025464e-05, "loss": 0.9731, "step": 5479 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5998690105378294e-05, "loss": 0.6956, "step": 5480 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.599709416186378e-05, "loss": 0.8873, "step": 5481 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.599549797977459e-05, "loss": 0.7907, "step": 5482 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5993901559174213e-05, "loss": 0.7945, "step": 5483 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.599230490012617e-05, "loss": 0.8018, "step": 5484 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.599070800269397e-05, "loss": 0.7182, "step": 5485 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5989110866941142e-05, "loss": 0.9976, "step": 5486 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5987513492931225e-05, "loss": 0.7278, "step": 5487 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5985915880727762e-05, "loss": 0.7953, "step": 5488 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.598431803039431e-05, "loss": 0.7432, "step": 5489 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5982719941994442e-05, "loss": 0.745, "step": 5490 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.598112161559172e-05, "loss": 0.8929, "step": 5491 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5979523051249737e-05, "loss": 0.7486, "step": 5492 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5977924249032085e-05, "loss": 0.8168, "step": 5493 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5976325209002357e-05, "loss": 0.8454, "step": 5494 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5974725931224183e-05, "loss": 0.7413, "step": 5495 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5973126415761175e-05, "loss": 0.9092, "step": 5496 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.597152666267696e-05, "loss": 0.7634, "step": 5497 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.596992667203519e-05, "loss": 0.7179, "step": 5498 }, { "epoch": 0.99, "grad_norm": 0.0, "learning_rate": 1.5968326443899507e-05, "loss": 0.7191, "step": 5499 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5966725978333573e-05, "loss": 0.7887, "step": 5500 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5965125275401063e-05, "loss": 0.8331, "step": 5501 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5963524335165644e-05, "loss": 0.8465, "step": 5502 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5961923157691016e-05, "loss": 0.7685, "step": 5503 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.596032174304087e-05, "loss": 0.9658, "step": 5504 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5958720091278915e-05, "loss": 0.9773, "step": 5505 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5957118202468866e-05, "loss": 0.6908, "step": 5506 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.595551607667445e-05, "loss": 0.7415, "step": 5507 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.59539137139594e-05, "loss": 0.9863, "step": 5508 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5952311114387466e-05, "loss": 0.6466, "step": 5509 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5950708278022398e-05, "loss": 0.7939, "step": 5510 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5949105204927963e-05, "loss": 0.7992, "step": 5511 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.594750189516793e-05, "loss": 0.7201, "step": 5512 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5945898348806078e-05, "loss": 0.909, "step": 5513 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5944294565906208e-05, "loss": 0.8021, "step": 5514 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5942690546532117e-05, "loss": 0.7752, "step": 5515 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5941086290747617e-05, "loss": 0.8727, "step": 5516 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5939481798616524e-05, "loss": 0.7017, "step": 5517 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5937877070202672e-05, "loss": 0.7633, "step": 5518 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5936272105569896e-05, "loss": 0.819, "step": 5519 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.593466690478205e-05, "loss": 0.8278, "step": 5520 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5933061467902984e-05, "loss": 0.7487, "step": 5521 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.593145579499657e-05, "loss": 0.8488, "step": 5522 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5929849886126684e-05, "loss": 0.8479, "step": 5523 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5928243741357214e-05, "loss": 0.773, "step": 5524 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5926637360752046e-05, "loss": 0.8495, "step": 5525 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5925030744375095e-05, "loss": 0.786, "step": 5526 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.592342389229027e-05, "loss": 0.8513, "step": 5527 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5921816804561493e-05, "loss": 0.6779, "step": 5528 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.59202094812527e-05, "loss": 0.7001, "step": 5529 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5918601922427838e-05, "loss": 0.6244, "step": 5530 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5916994128150847e-05, "loss": 0.693, "step": 5531 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.591538609848569e-05, "loss": 0.6926, "step": 5532 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5913777833496348e-05, "loss": 0.6046, "step": 5533 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.591216933324679e-05, "loss": 0.7101, "step": 5534 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5910560597801004e-05, "loss": 0.7357, "step": 5535 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5908951627222994e-05, "loss": 0.6635, "step": 5536 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5907342421576766e-05, "loss": 0.6701, "step": 5537 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.590573298092634e-05, "loss": 0.6591, "step": 5538 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5904123305335735e-05, "loss": 0.6063, "step": 5539 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.590251339486899e-05, "loss": 0.681, "step": 5540 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5900903249590154e-05, "loss": 0.8387, "step": 5541 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5899292869563275e-05, "loss": 0.6356, "step": 5542 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.589768225485242e-05, "loss": 0.6408, "step": 5543 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.589607140552166e-05, "loss": 0.6066, "step": 5544 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5894460321635085e-05, "loss": 0.6586, "step": 5545 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.589284900325678e-05, "loss": 0.7675, "step": 5546 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.589123745045084e-05, "loss": 0.6923, "step": 5547 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5889625663281383e-05, "loss": 0.6905, "step": 5548 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.588801364181253e-05, "loss": 0.7494, "step": 5549 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.588640138610841e-05, "loss": 0.7677, "step": 5550 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.588478889623316e-05, "loss": 0.625, "step": 5551 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.588317617225092e-05, "loss": 0.7073, "step": 5552 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5881563214225857e-05, "loss": 0.5953, "step": 5553 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 1.5879950022222133e-05, "loss": 0.677, "step": 5554 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5878336596303926e-05, "loss": 0.7219, "step": 5555 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5876722936535416e-05, "loss": 0.6151, "step": 5556 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.58751090429808e-05, "loss": 0.6482, "step": 5557 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5873494915704287e-05, "loss": 0.6003, "step": 5558 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.587188055477008e-05, "loss": 0.7525, "step": 5559 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5870265960242404e-05, "loss": 0.6146, "step": 5560 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5868651132185497e-05, "loss": 0.6668, "step": 5561 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.586703607066359e-05, "loss": 0.689, "step": 5562 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5865420775740936e-05, "loss": 0.6328, "step": 5563 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.58638052474818e-05, "loss": 0.7778, "step": 5564 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5862189485950444e-05, "loss": 0.7431, "step": 5565 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5860573491211148e-05, "loss": 0.6519, "step": 5566 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.58589572633282e-05, "loss": 0.6667, "step": 5567 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5857340802365892e-05, "loss": 0.6558, "step": 5568 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.585572410838854e-05, "loss": 0.6949, "step": 5569 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.585410718146044e-05, "loss": 0.7397, "step": 5570 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5852490021645938e-05, "loss": 0.5532, "step": 5571 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.585087262900935e-05, "loss": 0.6536, "step": 5572 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5849255003615033e-05, "loss": 0.6416, "step": 5573 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5847637145527323e-05, "loss": 0.67, "step": 5574 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5846019054810596e-05, "loss": 0.7279, "step": 5575 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5844400731529214e-05, "loss": 0.667, "step": 5576 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.584278217574756e-05, "loss": 0.7701, "step": 5577 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.584116338753002e-05, "loss": 0.5939, "step": 5578 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5839544366940995e-05, "loss": 0.6916, "step": 5579 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.583792511404489e-05, "loss": 0.6085, "step": 5580 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5836305628906124e-05, "loss": 0.6206, "step": 5581 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.583468591158912e-05, "loss": 0.5571, "step": 5582 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.583306596215832e-05, "loss": 0.6497, "step": 5583 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5831445780678164e-05, "loss": 0.7221, "step": 5584 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.58298253672131e-05, "loss": 0.6763, "step": 5585 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5828204721827596e-05, "loss": 0.5662, "step": 5586 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5826583844586125e-05, "loss": 0.6951, "step": 5587 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.582496273555317e-05, "loss": 0.7389, "step": 5588 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5823341394793214e-05, "loss": 0.7986, "step": 5589 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5821719822370763e-05, "loss": 0.6935, "step": 5590 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5820098018350324e-05, "loss": 0.6081, "step": 5591 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.581847598279642e-05, "loss": 0.6018, "step": 5592 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.581685371577357e-05, "loss": 0.5984, "step": 5593 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5815231217346315e-05, "loss": 0.6537, "step": 5594 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5813608487579197e-05, "loss": 0.5876, "step": 5595 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.581198552653678e-05, "loss": 0.6837, "step": 5596 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.581036233428362e-05, "loss": 0.6604, "step": 5597 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.580873891088429e-05, "loss": 0.7157, "step": 5598 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.580711525640338e-05, "loss": 0.7063, "step": 5599 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.580549137090547e-05, "loss": 0.7058, "step": 5600 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5803867254455175e-05, "loss": 0.5536, "step": 5601 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5802242907117095e-05, "loss": 0.5746, "step": 5602 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.580061832895585e-05, "loss": 0.7495, "step": 5603 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5798993520036074e-05, "loss": 0.6834, "step": 5604 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.57973684804224e-05, "loss": 0.5406, "step": 5605 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.579574321017948e-05, "loss": 0.5288, "step": 5606 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.579411770937196e-05, "loss": 0.5872, "step": 5607 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5792491978064516e-05, "loss": 0.7469, "step": 5608 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.579086601632182e-05, "loss": 0.6913, "step": 5609 }, { "epoch": 1.01, "grad_norm": 0.0, "learning_rate": 1.5789239824208548e-05, "loss": 0.699, "step": 5610 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5787613401789404e-05, "loss": 0.8165, "step": 5611 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5785986749129077e-05, "loss": 0.6407, "step": 5612 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.578435986629229e-05, "loss": 0.6377, "step": 5613 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.578273275334376e-05, "loss": 0.6525, "step": 5614 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5781105410348212e-05, "loss": 0.6467, "step": 5615 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5779477837370386e-05, "loss": 0.6604, "step": 5616 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5777850034475034e-05, "loss": 0.5884, "step": 5617 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5776222001726907e-05, "loss": 0.6277, "step": 5618 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5774593739190776e-05, "loss": 0.6658, "step": 5619 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5772965246931413e-05, "loss": 0.5059, "step": 5620 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.57713365250136e-05, "loss": 0.6396, "step": 5621 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5769707573502134e-05, "loss": 0.693, "step": 5622 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5768078392461822e-05, "loss": 0.6487, "step": 5623 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5766448981957462e-05, "loss": 0.5986, "step": 5624 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.576481934205389e-05, "loss": 0.6133, "step": 5625 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5763189472815924e-05, "loss": 0.5808, "step": 5626 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.576155937430841e-05, "loss": 0.5769, "step": 5627 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5759929046596198e-05, "loss": 0.5733, "step": 5628 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5758298489744136e-05, "loss": 0.6197, "step": 5629 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5756667703817095e-05, "loss": 0.6347, "step": 5630 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5755036688879952e-05, "loss": 0.7057, "step": 5631 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5753405444997593e-05, "loss": 0.5845, "step": 5632 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5751773972234907e-05, "loss": 0.5266, "step": 5633 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.57501422706568e-05, "loss": 0.5328, "step": 5634 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5748510340328183e-05, "loss": 0.587, "step": 5635 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5746878181313975e-05, "loss": 0.7345, "step": 5636 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.574524579367911e-05, "loss": 0.6968, "step": 5637 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.574361317748853e-05, "loss": 0.5753, "step": 5638 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.574198033280717e-05, "loss": 0.6786, "step": 5639 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5740347259699998e-05, "loss": 0.6734, "step": 5640 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.573871395823198e-05, "loss": 0.6607, "step": 5641 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5737080428468087e-05, "loss": 0.6165, "step": 5642 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5735446670473308e-05, "loss": 0.6088, "step": 5643 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.573381268431264e-05, "loss": 0.6449, "step": 5644 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5732178470051076e-05, "loss": 0.6185, "step": 5645 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5730544027753632e-05, "loss": 0.621, "step": 5646 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.572890935748533e-05, "loss": 0.7032, "step": 5647 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.57272744593112e-05, "loss": 0.6544, "step": 5648 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.572563933329628e-05, "loss": 0.659, "step": 5649 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.572400397950562e-05, "loss": 0.6226, "step": 5650 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5722368398004278e-05, "loss": 0.6495, "step": 5651 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5720732588857316e-05, "loss": 0.6341, "step": 5652 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.571909655212981e-05, "loss": 0.58, "step": 5653 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5717460287886845e-05, "loss": 0.6995, "step": 5654 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5715823796193517e-05, "loss": 0.6275, "step": 5655 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.571418707711493e-05, "loss": 0.5384, "step": 5656 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.571255013071619e-05, "loss": 0.5855, "step": 5657 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5710912957062417e-05, "loss": 0.8417, "step": 5658 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5709275556218744e-05, "loss": 0.6892, "step": 5659 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.570763792825031e-05, "loss": 0.6929, "step": 5660 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5706000073222263e-05, "loss": 0.676, "step": 5661 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5704361991199756e-05, "loss": 0.5865, "step": 5662 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.5702723682247957e-05, "loss": 0.6024, "step": 5663 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.570108514643204e-05, "loss": 0.6211, "step": 5664 }, { "epoch": 1.02, "grad_norm": 0.0, "learning_rate": 1.569944638381719e-05, "loss": 0.6184, "step": 5665 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.56978073944686e-05, "loss": 0.7238, "step": 5666 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.569616817845147e-05, "loss": 0.6842, "step": 5667 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5694528735831013e-05, "loss": 0.6868, "step": 5668 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5692889066672447e-05, "loss": 0.556, "step": 5669 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5691249171040998e-05, "loss": 0.596, "step": 5670 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5689609049001914e-05, "loss": 0.7349, "step": 5671 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.568796870062043e-05, "loss": 0.6791, "step": 5672 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5686328125961808e-05, "loss": 0.6135, "step": 5673 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5684687325091313e-05, "loss": 0.5954, "step": 5674 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.568304629807422e-05, "loss": 0.6316, "step": 5675 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.568140504497581e-05, "loss": 0.6349, "step": 5676 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5679763565861367e-05, "loss": 0.673, "step": 5677 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.56781218607962e-05, "loss": 0.6489, "step": 5678 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5676479929845628e-05, "loss": 0.6447, "step": 5679 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5674837773074956e-05, "loss": 0.739, "step": 5680 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.567319539054951e-05, "loss": 0.6363, "step": 5681 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.567155278233464e-05, "loss": 0.6216, "step": 5682 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5669909948495682e-05, "loss": 0.5243, "step": 5683 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.566826688909799e-05, "loss": 0.619, "step": 5684 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5666623604206934e-05, "loss": 0.6166, "step": 5685 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.566498009388788e-05, "loss": 0.6199, "step": 5686 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5663336358206217e-05, "loss": 0.6575, "step": 5687 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5661692397227333e-05, "loss": 0.6483, "step": 5688 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5660048211016623e-05, "loss": 0.654, "step": 5689 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5658403799639503e-05, "loss": 0.636, "step": 5690 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5656759163161385e-05, "loss": 0.6298, "step": 5691 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5655114301647694e-05, "loss": 0.6538, "step": 5692 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.565346921516387e-05, "loss": 0.5898, "step": 5693 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5651823903775353e-05, "loss": 0.6068, "step": 5694 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5650178367547604e-05, "loss": 0.6576, "step": 5695 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5648532606546075e-05, "loss": 0.678, "step": 5696 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5646886620836247e-05, "loss": 0.6166, "step": 5697 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5645240410483594e-05, "loss": 0.6134, "step": 5698 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5643593975553607e-05, "loss": 0.5654, "step": 5699 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5641947316111787e-05, "loss": 0.6097, "step": 5700 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.564030043222363e-05, "loss": 0.6632, "step": 5701 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5638653323954664e-05, "loss": 0.5973, "step": 5702 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5637005991370407e-05, "loss": 0.7069, "step": 5703 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5635358434536397e-05, "loss": 0.709, "step": 5704 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5633710653518174e-05, "loss": 0.7634, "step": 5705 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.563206264838129e-05, "loss": 0.6618, "step": 5706 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5630414419191307e-05, "loss": 0.6082, "step": 5707 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.562876596601379e-05, "loss": 0.6174, "step": 5708 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5627117288914324e-05, "loss": 0.649, "step": 5709 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5625468387958492e-05, "loss": 0.6693, "step": 5710 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5623819263211886e-05, "loss": 0.6031, "step": 5711 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.562216991474012e-05, "loss": 0.6811, "step": 5712 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.56205203426088e-05, "loss": 0.6773, "step": 5713 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5618870546883555e-05, "loss": 0.6739, "step": 5714 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.561722052763001e-05, "loss": 0.7393, "step": 5715 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5615570284913814e-05, "loss": 0.6567, "step": 5716 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5613919818800612e-05, "loss": 0.7265, "step": 5717 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5612269129356064e-05, "loss": 0.7117, "step": 5718 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5610618216645832e-05, "loss": 0.7068, "step": 5719 }, { "epoch": 1.03, "grad_norm": 0.0, "learning_rate": 1.5608967080735595e-05, "loss": 0.6084, "step": 5720 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5607315721691044e-05, "loss": 0.6127, "step": 5721 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5605664139577864e-05, "loss": 0.6204, "step": 5722 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.560401233446176e-05, "loss": 0.7005, "step": 5723 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5602360306408448e-05, "loss": 0.5791, "step": 5724 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5600708055483643e-05, "loss": 0.6216, "step": 5725 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.559905558175308e-05, "loss": 0.642, "step": 5726 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.559740288528249e-05, "loss": 0.5796, "step": 5727 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5595749966137632e-05, "loss": 0.6417, "step": 5728 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5594096824384248e-05, "loss": 0.6063, "step": 5729 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.559244346008811e-05, "loss": 0.6654, "step": 5730 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5590789873314994e-05, "loss": 0.6035, "step": 5731 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5589136064130675e-05, "loss": 0.6192, "step": 5732 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.558748203260095e-05, "loss": 0.5721, "step": 5733 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.558582777879162e-05, "loss": 0.6071, "step": 5734 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.558417330276849e-05, "loss": 0.5721, "step": 5735 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5582518604597377e-05, "loss": 0.652, "step": 5736 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5580863684344116e-05, "loss": 0.7835, "step": 5737 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5579208542074534e-05, "loss": 0.6116, "step": 5738 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5577553177854482e-05, "loss": 0.6171, "step": 5739 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5575897591749803e-05, "loss": 0.7113, "step": 5740 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.557424178382637e-05, "loss": 0.6139, "step": 5741 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5572585754150048e-05, "loss": 0.6175, "step": 5742 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5570929502786722e-05, "loss": 0.6242, "step": 5743 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.556927302980227e-05, "loss": 0.679, "step": 5744 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5567616335262603e-05, "loss": 0.8033, "step": 5745 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.556595941923362e-05, "loss": 0.6688, "step": 5746 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5564302281781232e-05, "loss": 0.6065, "step": 5747 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.556264492297137e-05, "loss": 0.8201, "step": 5748 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5560987342869962e-05, "loss": 0.6828, "step": 5749 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.555932954154295e-05, "loss": 0.5054, "step": 5750 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.555767151905629e-05, "loss": 0.6085, "step": 5751 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.555601327547593e-05, "loss": 0.6372, "step": 5752 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5554354810867848e-05, "loss": 0.6104, "step": 5753 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5552696125298016e-05, "loss": 0.5487, "step": 5754 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.555103721883242e-05, "loss": 0.7007, "step": 5755 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5549378091537052e-05, "loss": 0.6604, "step": 5756 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.554771874347792e-05, "loss": 0.6888, "step": 5757 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.554605917472103e-05, "loss": 0.5985, "step": 5758 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.554439938533241e-05, "loss": 0.6276, "step": 5759 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5542739375378076e-05, "loss": 0.601, "step": 5760 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.554107914492408e-05, "loss": 0.638, "step": 5761 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.553941869403646e-05, "loss": 0.6221, "step": 5762 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.553775802278128e-05, "loss": 0.5935, "step": 5763 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5536097131224594e-05, "loss": 0.6889, "step": 5764 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.553443601943248e-05, "loss": 0.7576, "step": 5765 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5532774687471026e-05, "loss": 0.6569, "step": 5766 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5531113135406314e-05, "loss": 0.7773, "step": 5767 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5529451363304442e-05, "loss": 0.7611, "step": 5768 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5527789371231526e-05, "loss": 0.6495, "step": 5769 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5526127159253677e-05, "loss": 0.6982, "step": 5770 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5524464727437026e-05, "loss": 0.713, "step": 5771 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5522802075847706e-05, "loss": 0.6947, "step": 5772 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5521139204551853e-05, "loss": 0.6329, "step": 5773 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5519476113615626e-05, "loss": 0.6164, "step": 5774 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 1.5517812803105184e-05, "loss": 0.6722, "step": 5775 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5516149273086695e-05, "loss": 0.6591, "step": 5776 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.551448552362634e-05, "loss": 0.6489, "step": 5777 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5512821554790306e-05, "loss": 0.5871, "step": 5778 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.551115736664478e-05, "loss": 0.7276, "step": 5779 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.550949295925598e-05, "loss": 0.6378, "step": 5780 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.550782833269011e-05, "loss": 0.6974, "step": 5781 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.550616348701339e-05, "loss": 0.697, "step": 5782 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5504498422292055e-05, "loss": 0.6306, "step": 5783 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5502833138592344e-05, "loss": 0.733, "step": 5784 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.550116763598051e-05, "loss": 0.6685, "step": 5785 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5499501914522793e-05, "loss": 0.6939, "step": 5786 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5497835974285473e-05, "loss": 0.7209, "step": 5787 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.549616981533482e-05, "loss": 0.7514, "step": 5788 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5494503437737117e-05, "loss": 0.7417, "step": 5789 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.549283684155865e-05, "loss": 0.5477, "step": 5790 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.549117002686573e-05, "loss": 0.6766, "step": 5791 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5489502993724658e-05, "loss": 0.6846, "step": 5792 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5487835742201748e-05, "loss": 0.608, "step": 5793 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.548616827236334e-05, "loss": 0.7148, "step": 5794 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5484500584275752e-05, "loss": 0.6115, "step": 5795 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.548283267800534e-05, "loss": 0.6685, "step": 5796 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5481164553618446e-05, "loss": 0.7318, "step": 5797 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5479496211181437e-05, "loss": 0.65, "step": 5798 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5477827650760685e-05, "loss": 0.6409, "step": 5799 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5476158872422568e-05, "loss": 0.645, "step": 5800 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5474489876233465e-05, "loss": 0.6601, "step": 5801 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5472820662259774e-05, "loss": 0.6114, "step": 5802 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5471151230567906e-05, "loss": 0.7879, "step": 5803 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5469481581224274e-05, "loss": 0.575, "step": 5804 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5467811714295286e-05, "loss": 0.5815, "step": 5805 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5466141629847384e-05, "loss": 0.6807, "step": 5806 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5464471327947005e-05, "loss": 0.7756, "step": 5807 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5462800808660597e-05, "loss": 0.7043, "step": 5808 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.546113007205461e-05, "loss": 0.6833, "step": 5809 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5459459118195517e-05, "loss": 0.5506, "step": 5810 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5457787947149786e-05, "loss": 0.6013, "step": 5811 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5456116558983903e-05, "loss": 0.6497, "step": 5812 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5454444953764353e-05, "loss": 0.6428, "step": 5813 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5452773131557642e-05, "loss": 0.5531, "step": 5814 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5451101092430273e-05, "loss": 0.6339, "step": 5815 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5449428836448764e-05, "loss": 0.5954, "step": 5816 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.544775636367964e-05, "loss": 0.7011, "step": 5817 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5446083674189435e-05, "loss": 0.7003, "step": 5818 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5444410768044694e-05, "loss": 0.6666, "step": 5819 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5442737645311968e-05, "loss": 0.6735, "step": 5820 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.544106430605781e-05, "loss": 0.6951, "step": 5821 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5439390750348794e-05, "loss": 0.6387, "step": 5822 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5437716978251497e-05, "loss": 0.5725, "step": 5823 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.54360429898325e-05, "loss": 0.7214, "step": 5824 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5434368785158405e-05, "loss": 0.5851, "step": 5825 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5432694364295805e-05, "loss": 0.6016, "step": 5826 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5431019727311322e-05, "loss": 0.7286, "step": 5827 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5429344874271567e-05, "loss": 0.6381, "step": 5828 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5427669805243174e-05, "loss": 0.6459, "step": 5829 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5425994520292778e-05, "loss": 0.6728, "step": 5830 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 1.5424319019487022e-05, "loss": 0.5715, "step": 5831 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5422643302892563e-05, "loss": 0.6347, "step": 5832 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5420967370576064e-05, "loss": 0.7485, "step": 5833 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.54192912226042e-05, "loss": 0.5638, "step": 5834 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5417614859043645e-05, "loss": 0.6359, "step": 5835 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5415938279961092e-05, "loss": 0.5477, "step": 5836 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5414261485423234e-05, "loss": 0.6099, "step": 5837 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.541258447549678e-05, "loss": 0.6381, "step": 5838 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5410907250248445e-05, "loss": 0.775, "step": 5839 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5409229809744945e-05, "loss": 0.7168, "step": 5840 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.540755215405302e-05, "loss": 0.6323, "step": 5841 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5405874283239405e-05, "loss": 0.5918, "step": 5842 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5404196197370853e-05, "loss": 0.5944, "step": 5843 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5402517896514115e-05, "loss": 0.6685, "step": 5844 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5400839380735963e-05, "loss": 0.6432, "step": 5845 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5399160650103168e-05, "loss": 0.7188, "step": 5846 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5397481704682512e-05, "loss": 0.4955, "step": 5847 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5395802544540786e-05, "loss": 0.7325, "step": 5848 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.539412316974479e-05, "loss": 0.6796, "step": 5849 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.539244358036134e-05, "loss": 0.6454, "step": 5850 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.539076377645724e-05, "loss": 0.7058, "step": 5851 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5389083758099328e-05, "loss": 0.6346, "step": 5852 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.538740352535443e-05, "loss": 0.6131, "step": 5853 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.538572307828939e-05, "loss": 0.7138, "step": 5854 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.538404241697106e-05, "loss": 0.7068, "step": 5855 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5382361541466296e-05, "loss": 0.5442, "step": 5856 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5380680451841972e-05, "loss": 0.6035, "step": 5857 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5378999148164965e-05, "loss": 0.7361, "step": 5858 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5377317630502157e-05, "loss": 0.6515, "step": 5859 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5375635898920432e-05, "loss": 0.6834, "step": 5860 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5373953953486713e-05, "loss": 0.6159, "step": 5861 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5372271794267896e-05, "loss": 0.7411, "step": 5862 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5370589421330903e-05, "loss": 0.5934, "step": 5863 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5368906834742666e-05, "loss": 0.666, "step": 5864 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.536722403457011e-05, "loss": 0.5814, "step": 5865 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5365541020880195e-05, "loss": 0.5217, "step": 5866 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5363857793739866e-05, "loss": 0.7171, "step": 5867 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.536217435321608e-05, "loss": 0.5696, "step": 5868 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5360490699375815e-05, "loss": 0.734, "step": 5869 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.535880683228605e-05, "loss": 0.5358, "step": 5870 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5357122752013763e-05, "loss": 0.6167, "step": 5871 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.535543845862596e-05, "loss": 0.6147, "step": 5872 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5353753952189638e-05, "loss": 0.6999, "step": 5873 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5352069232771817e-05, "loss": 0.624, "step": 5874 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.535038430043951e-05, "loss": 0.6941, "step": 5875 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5348699155259748e-05, "loss": 0.6848, "step": 5876 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5347013797299573e-05, "loss": 0.7084, "step": 5877 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.534532822662603e-05, "loss": 0.6675, "step": 5878 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.534364244330617e-05, "loss": 0.5833, "step": 5879 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.534195644740706e-05, "loss": 0.5512, "step": 5880 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5340270238995775e-05, "loss": 0.6752, "step": 5881 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.533858381813939e-05, "loss": 0.6593, "step": 5882 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5336897184904996e-05, "loss": 0.7051, "step": 5883 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5335210339359684e-05, "loss": 0.7194, "step": 5884 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.533352328157057e-05, "loss": 0.5871, "step": 5885 }, { "epoch": 1.06, "grad_norm": 0.0, "learning_rate": 1.5331836011604764e-05, "loss": 0.6262, "step": 5886 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.533014852952938e-05, "loss": 0.6609, "step": 5887 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5328460835411565e-05, "loss": 0.7157, "step": 5888 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5326772929318448e-05, "loss": 0.6404, "step": 5889 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5325084811317176e-05, "loss": 0.6715, "step": 5890 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5323396481474912e-05, "loss": 0.657, "step": 5891 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.532170793985881e-05, "loss": 0.653, "step": 5892 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5320019186536056e-05, "loss": 0.6129, "step": 5893 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.531833022157382e-05, "loss": 0.6635, "step": 5894 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5316641045039297e-05, "loss": 0.6897, "step": 5895 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.531495165699969e-05, "loss": 0.7683, "step": 5896 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5313262057522198e-05, "loss": 0.6509, "step": 5897 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.531157224667404e-05, "loss": 0.6096, "step": 5898 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.530988222452244e-05, "loss": 0.6521, "step": 5899 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5308191991134623e-05, "loss": 0.6539, "step": 5900 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.530650154657784e-05, "loss": 0.6445, "step": 5901 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.530481089091934e-05, "loss": 0.7229, "step": 5902 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5303120024226366e-05, "loss": 0.6133, "step": 5903 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5301428946566195e-05, "loss": 0.7081, "step": 5904 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5299737658006103e-05, "loss": 0.6534, "step": 5905 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.529804615861336e-05, "loss": 0.6747, "step": 5906 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5296354448455274e-05, "loss": 0.6377, "step": 5907 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.529466252759913e-05, "loss": 0.7998, "step": 5908 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.529297039611224e-05, "loss": 0.6214, "step": 5909 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5291278054061922e-05, "loss": 0.5897, "step": 5910 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5289585501515495e-05, "loss": 0.6079, "step": 5911 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.52878927385403e-05, "loss": 0.59, "step": 5912 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.528619976520367e-05, "loss": 0.711, "step": 5913 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.528450658157296e-05, "loss": 0.6032, "step": 5914 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5282813187715523e-05, "loss": 0.6021, "step": 5915 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.528111958369873e-05, "loss": 0.6679, "step": 5916 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.527942576958995e-05, "loss": 0.6794, "step": 5917 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.527773174545657e-05, "loss": 0.6521, "step": 5918 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.527603751136598e-05, "loss": 0.72, "step": 5919 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.527434306738558e-05, "loss": 0.6422, "step": 5920 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5272648413582778e-05, "loss": 0.718, "step": 5921 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5270953550024986e-05, "loss": 0.6067, "step": 5922 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5269258476779635e-05, "loss": 0.6379, "step": 5923 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5267563193914154e-05, "loss": 0.6093, "step": 5924 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5265867701495986e-05, "loss": 0.6531, "step": 5925 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.526417199959258e-05, "loss": 0.6551, "step": 5926 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5262476088271393e-05, "loss": 0.6631, "step": 5927 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5260779967599892e-05, "loss": 0.6102, "step": 5928 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5259083637645557e-05, "loss": 0.7267, "step": 5929 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5257387098475857e-05, "loss": 0.6631, "step": 5930 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5255690350158297e-05, "loss": 0.6338, "step": 5931 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.525399339276037e-05, "loss": 0.629, "step": 5932 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5252296226349582e-05, "loss": 0.69, "step": 5933 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5250598850993459e-05, "loss": 0.7553, "step": 5934 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5248901266759517e-05, "loss": 0.676, "step": 5935 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5247203473715289e-05, "loss": 0.5445, "step": 5936 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5245505471928317e-05, "loss": 0.6306, "step": 5937 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5243807261466152e-05, "loss": 0.695, "step": 5938 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5242108842396351e-05, "loss": 0.6792, "step": 5939 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5240410214786483e-05, "loss": 0.5367, "step": 5940 }, { "epoch": 1.07, "grad_norm": 0.0, "learning_rate": 1.5238711378704115e-05, "loss": 0.5878, "step": 5941 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5237012334216837e-05, "loss": 0.7895, "step": 5942 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5235313081392236e-05, "loss": 0.5794, "step": 5943 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.523361362029791e-05, "loss": 0.6754, "step": 5944 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.523191395100147e-05, "loss": 0.7165, "step": 5945 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5230214073570531e-05, "loss": 0.5706, "step": 5946 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5228513988072716e-05, "loss": 0.7454, "step": 5947 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.522681369457566e-05, "loss": 0.638, "step": 5948 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5225113193146997e-05, "loss": 0.7308, "step": 5949 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5223412483854381e-05, "loss": 0.5477, "step": 5950 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.522171156676547e-05, "loss": 0.58, "step": 5951 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5220010441947925e-05, "loss": 0.556, "step": 5952 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5218309109469424e-05, "loss": 0.7123, "step": 5953 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5216607569397646e-05, "loss": 0.803, "step": 5954 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5214905821800286e-05, "loss": 0.5871, "step": 5955 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5213203866745034e-05, "loss": 0.6628, "step": 5956 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5211501704299603e-05, "loss": 0.6235, "step": 5957 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5209799334531707e-05, "loss": 0.6412, "step": 5958 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5208096757509068e-05, "loss": 0.6234, "step": 5959 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.520639397329942e-05, "loss": 0.6947, "step": 5960 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.52046909819705e-05, "loss": 0.497, "step": 5961 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5202987783590055e-05, "loss": 0.6335, "step": 5962 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5201284378225843e-05, "loss": 0.5818, "step": 5963 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5199580765945629e-05, "loss": 0.7402, "step": 5964 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5197876946817182e-05, "loss": 0.5465, "step": 5965 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.519617292090829e-05, "loss": 0.7177, "step": 5966 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.519446868828674e-05, "loss": 0.6173, "step": 5967 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.519276424902032e-05, "loss": 0.6212, "step": 5968 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5191059603176843e-05, "loss": 0.6703, "step": 5969 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5189354750824126e-05, "loss": 0.6658, "step": 5970 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5187649692029984e-05, "loss": 0.56, "step": 5971 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5185944426862254e-05, "loss": 0.6754, "step": 5972 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5184238955388766e-05, "loss": 0.6213, "step": 5973 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5182533277677376e-05, "loss": 0.5622, "step": 5974 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5180827393795932e-05, "loss": 0.5661, "step": 5975 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5179121303812296e-05, "loss": 0.6003, "step": 5976 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5177415007794345e-05, "loss": 0.608, "step": 5977 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5175708505809956e-05, "loss": 0.7192, "step": 5978 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5174001797927015e-05, "loss": 0.6654, "step": 5979 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.517229488421342e-05, "loss": 0.6046, "step": 5980 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5170587764737077e-05, "loss": 0.6482, "step": 5981 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.516888043956589e-05, "loss": 0.5943, "step": 5982 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5167172908767786e-05, "loss": 0.7193, "step": 5983 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5165465172410692e-05, "loss": 0.6428, "step": 5984 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5163757230562542e-05, "loss": 0.6621, "step": 5985 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5162049083291288e-05, "loss": 0.7222, "step": 5986 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5160340730664875e-05, "loss": 0.6387, "step": 5987 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5158632172751268e-05, "loss": 0.6306, "step": 5988 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5156923409618438e-05, "loss": 0.6695, "step": 5989 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5155214441334359e-05, "loss": 0.675, "step": 5990 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5153505267967016e-05, "loss": 0.7511, "step": 5991 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5151795889584407e-05, "loss": 0.6981, "step": 5992 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5150086306254534e-05, "loss": 0.6319, "step": 5993 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5148376518045405e-05, "loss": 0.6786, "step": 5994 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5146666525025033e-05, "loss": 0.7117, "step": 5995 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5144956327261455e-05, "loss": 0.6808, "step": 5996 }, { "epoch": 1.08, "grad_norm": 0.0, "learning_rate": 1.5143245924822699e-05, "loss": 0.6443, "step": 5997 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.514153531777681e-05, "loss": 0.6469, "step": 5998 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5139824506191839e-05, "loss": 0.6784, "step": 5999 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5138113490135839e-05, "loss": 0.6528, "step": 6000 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5136402269676888e-05, "loss": 0.7069, "step": 6001 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5134690844883056e-05, "loss": 0.6052, "step": 6002 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.513297921582242e-05, "loss": 0.6615, "step": 6003 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5131267382563085e-05, "loss": 0.664, "step": 6004 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.512955534517314e-05, "loss": 0.6363, "step": 6005 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5127843103720689e-05, "loss": 0.6429, "step": 6006 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5126130658273863e-05, "loss": 0.6657, "step": 6007 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5124418008900776e-05, "loss": 0.7676, "step": 6008 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5122705155669564e-05, "loss": 0.6164, "step": 6009 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5120992098648362e-05, "loss": 0.7132, "step": 6010 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.511927883790532e-05, "loss": 0.5819, "step": 6011 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.51175653735086e-05, "loss": 0.57, "step": 6012 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5115851705526362e-05, "loss": 0.6639, "step": 6013 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5114137834026776e-05, "loss": 0.6078, "step": 6014 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.511242375907803e-05, "loss": 0.6353, "step": 6015 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5110709480748308e-05, "loss": 0.6079, "step": 6016 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5108994999105806e-05, "loss": 0.5447, "step": 6017 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.510728031421873e-05, "loss": 0.6106, "step": 6018 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5105565426155297e-05, "loss": 0.7345, "step": 6019 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5103850334983725e-05, "loss": 0.6234, "step": 6020 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5102135040772244e-05, "loss": 0.7375, "step": 6021 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5100419543589085e-05, "loss": 0.6671, "step": 6022 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5098703843502505e-05, "loss": 0.7182, "step": 6023 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5096987940580747e-05, "loss": 0.5855, "step": 6024 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5095271834892078e-05, "loss": 0.6647, "step": 6025 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5093555526504768e-05, "loss": 0.7592, "step": 6026 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5091839015487093e-05, "loss": 0.6772, "step": 6027 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.509012230190734e-05, "loss": 0.5995, "step": 6028 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5088405385833804e-05, "loss": 0.6622, "step": 6029 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5086688267334779e-05, "loss": 0.6547, "step": 6030 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5084970946478588e-05, "loss": 0.6947, "step": 6031 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5083253423333535e-05, "loss": 0.6396, "step": 6032 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5081535697967954e-05, "loss": 0.6133, "step": 6033 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.507981777045018e-05, "loss": 0.7126, "step": 6034 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5078099640848551e-05, "loss": 0.7525, "step": 6035 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5076381309231422e-05, "loss": 0.7017, "step": 6036 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5074662775667146e-05, "loss": 0.5474, "step": 6037 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5072944040224088e-05, "loss": 0.5775, "step": 6038 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5071225102970633e-05, "loss": 0.5795, "step": 6039 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5069505963975153e-05, "loss": 0.6817, "step": 6040 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.506778662330604e-05, "loss": 0.6745, "step": 6041 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5066067081031696e-05, "loss": 0.6488, "step": 6042 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5064347337220522e-05, "loss": 0.6186, "step": 6043 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5062627391940936e-05, "loss": 0.6756, "step": 6044 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5060907245261359e-05, "loss": 0.6914, "step": 6045 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5059186897250222e-05, "loss": 0.6237, "step": 6046 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5057466347975964e-05, "loss": 0.7296, "step": 6047 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.505574559750703e-05, "loss": 0.647, "step": 6048 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5054024645911875e-05, "loss": 0.5938, "step": 6049 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5052303493258964e-05, "loss": 0.6691, "step": 6050 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5050582139616765e-05, "loss": 0.6994, "step": 6051 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 1.5048860585053752e-05, "loss": 0.7031, "step": 6052 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5047138829638419e-05, "loss": 0.6715, "step": 6053 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5045416873439256e-05, "loss": 0.6959, "step": 6054 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5043694716524769e-05, "loss": 0.6504, "step": 6055 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5041972358963464e-05, "loss": 0.7022, "step": 6056 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5040249800823862e-05, "loss": 0.6549, "step": 6057 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5038527042174486e-05, "loss": 0.5832, "step": 6058 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5036804083083875e-05, "loss": 0.663, "step": 6059 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5035080923620568e-05, "loss": 0.6924, "step": 6060 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5033357563853117e-05, "loss": 0.7325, "step": 6061 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.503163400385008e-05, "loss": 0.5889, "step": 6062 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.502991024368002e-05, "loss": 0.7457, "step": 6063 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5028186283411516e-05, "loss": 0.6813, "step": 6064 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5026462123113147e-05, "loss": 0.6281, "step": 6065 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5024737762853507e-05, "loss": 0.6041, "step": 6066 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5023013202701189e-05, "loss": 0.6116, "step": 6067 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.50212884427248e-05, "loss": 0.7139, "step": 6068 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5019563482992955e-05, "loss": 0.5765, "step": 6069 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5017838323574276e-05, "loss": 0.5923, "step": 6070 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5016112964537391e-05, "loss": 0.5708, "step": 6071 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.501438740595094e-05, "loss": 0.7025, "step": 6072 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5012661647883571e-05, "loss": 0.5639, "step": 6073 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5010935690403932e-05, "loss": 0.6089, "step": 6074 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5009209533580687e-05, "loss": 0.6053, "step": 6075 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5007483177482505e-05, "loss": 0.6559, "step": 6076 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5005756622178064e-05, "loss": 0.6468, "step": 6077 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.500402986773605e-05, "loss": 0.6351, "step": 6078 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.5002302914225153e-05, "loss": 0.7831, "step": 6079 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.500057576171408e-05, "loss": 0.6453, "step": 6080 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4998848410271535e-05, "loss": 0.5942, "step": 6081 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4997120859966236e-05, "loss": 0.6386, "step": 6082 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4995393110866912e-05, "loss": 0.7285, "step": 6083 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.499366516304229e-05, "loss": 0.6436, "step": 6084 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4991937016561113e-05, "loss": 0.64, "step": 6085 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.499020867149213e-05, "loss": 0.6423, "step": 6086 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4988480127904097e-05, "loss": 0.7973, "step": 6087 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.498675138586578e-05, "loss": 0.6942, "step": 6088 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.498502244544595e-05, "loss": 0.6354, "step": 6089 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4983293306713385e-05, "loss": 0.736, "step": 6090 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4981563969736879e-05, "loss": 0.5854, "step": 6091 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.497983443458522e-05, "loss": 0.7003, "step": 6092 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4978104701327221e-05, "loss": 0.6922, "step": 6093 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4976374770031688e-05, "loss": 0.7046, "step": 6094 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4974644640767439e-05, "loss": 0.6258, "step": 6095 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4972914313603307e-05, "loss": 0.7149, "step": 6096 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4971183788608123e-05, "loss": 0.6531, "step": 6097 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.496945306585073e-05, "loss": 0.7189, "step": 6098 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4967722145399986e-05, "loss": 0.6677, "step": 6099 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.496599102732474e-05, "loss": 0.5456, "step": 6100 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4964259711693865e-05, "loss": 0.6011, "step": 6101 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4962528198576233e-05, "loss": 0.639, "step": 6102 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4960796488040731e-05, "loss": 0.6023, "step": 6103 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4959064580156246e-05, "loss": 0.5993, "step": 6104 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4957332474991676e-05, "loss": 0.6539, "step": 6105 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4955600172615927e-05, "loss": 0.8432, "step": 6106 }, { "epoch": 1.1, "grad_norm": 0.0, "learning_rate": 1.4953867673097917e-05, "loss": 0.6375, "step": 6107 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.495213497650656e-05, "loss": 0.6015, "step": 6108 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.495040208291079e-05, "loss": 0.6444, "step": 6109 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.494866899237955e-05, "loss": 0.8319, "step": 6110 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4946935704981775e-05, "loss": 0.6279, "step": 6111 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4945202220786425e-05, "loss": 0.7048, "step": 6112 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.494346853986246e-05, "loss": 0.7277, "step": 6113 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4941734662278847e-05, "loss": 0.6927, "step": 6114 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4940000588104561e-05, "loss": 0.6449, "step": 6115 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4938266317408591e-05, "loss": 0.6493, "step": 6116 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4936531850259927e-05, "loss": 0.722, "step": 6117 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4934797186727573e-05, "loss": 0.5569, "step": 6118 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4933062326880525e-05, "loss": 0.6454, "step": 6119 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4931327270787815e-05, "loss": 0.691, "step": 6120 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4929592018518453e-05, "loss": 0.728, "step": 6121 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4927856570141476e-05, "loss": 0.6669, "step": 6122 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4926120925725927e-05, "loss": 0.6298, "step": 6123 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4924385085340844e-05, "loss": 0.725, "step": 6124 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4922649049055287e-05, "loss": 0.6918, "step": 6125 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4920912816938322e-05, "loss": 0.6679, "step": 6126 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.491917638905901e-05, "loss": 0.599, "step": 6127 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4917439765486436e-05, "loss": 0.6921, "step": 6128 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4915702946289687e-05, "loss": 0.6488, "step": 6129 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.491396593153785e-05, "loss": 0.6706, "step": 6130 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.491222872130003e-05, "loss": 0.6801, "step": 6131 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.491049131564534e-05, "loss": 0.6618, "step": 6132 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4908753714642892e-05, "loss": 0.7172, "step": 6133 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.490701591836181e-05, "loss": 0.6698, "step": 6134 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4905277926871228e-05, "loss": 0.6603, "step": 6135 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4903539740240289e-05, "loss": 0.6529, "step": 6136 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.490180135853814e-05, "loss": 0.549, "step": 6137 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4900062781833932e-05, "loss": 0.6646, "step": 6138 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4898324010196837e-05, "loss": 0.6822, "step": 6139 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4896585043696017e-05, "loss": 0.7356, "step": 6140 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4894845882400659e-05, "loss": 0.6001, "step": 6141 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4893106526379946e-05, "loss": 0.723, "step": 6142 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4891366975703073e-05, "loss": 0.6149, "step": 6143 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4889627230439238e-05, "loss": 0.6993, "step": 6144 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4887887290657661e-05, "loss": 0.5974, "step": 6145 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4886147156427551e-05, "loss": 0.5697, "step": 6146 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4884406827818136e-05, "loss": 0.6063, "step": 6147 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4882666304898655e-05, "loss": 0.5806, "step": 6148 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4880925587738339e-05, "loss": 0.6383, "step": 6149 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4879184676406442e-05, "loss": 0.6812, "step": 6150 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4877443570972223e-05, "loss": 0.5693, "step": 6151 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4875702271504942e-05, "loss": 0.5498, "step": 6152 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.487396077807387e-05, "loss": 0.6274, "step": 6153 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.487221909074829e-05, "loss": 0.6696, "step": 6154 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.487047720959749e-05, "loss": 0.6742, "step": 6155 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4868735134690762e-05, "loss": 0.6132, "step": 6156 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4866992866097407e-05, "loss": 0.6476, "step": 6157 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4865250403886739e-05, "loss": 0.5971, "step": 6158 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4863507748128078e-05, "loss": 0.5438, "step": 6159 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4861764898890748e-05, "loss": 0.7632, "step": 6160 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4860021856244077e-05, "loss": 0.5642, "step": 6161 }, { "epoch": 1.11, "grad_norm": 0.0, "learning_rate": 1.4858278620257412e-05, "loss": 0.7206, "step": 6162 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.48565351910001e-05, "loss": 0.5837, "step": 6163 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4854791568541505e-05, "loss": 0.6371, "step": 6164 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4853047752950977e-05, "loss": 0.7095, "step": 6165 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.48513037442979e-05, "loss": 0.6724, "step": 6166 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4849559542651648e-05, "loss": 0.6652, "step": 6167 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.484781514808161e-05, "loss": 0.7214, "step": 6168 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4846070560657179e-05, "loss": 0.7801, "step": 6169 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4844325780447761e-05, "loss": 0.7467, "step": 6170 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4842580807522766e-05, "loss": 0.6304, "step": 6171 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4840835641951608e-05, "loss": 0.6103, "step": 6172 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4839090283803714e-05, "loss": 0.6333, "step": 6173 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4837344733148522e-05, "loss": 0.6223, "step": 6174 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.483559899005547e-05, "loss": 0.626, "step": 6175 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4833853054594003e-05, "loss": 0.6236, "step": 6176 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4832106926833584e-05, "loss": 0.5252, "step": 6177 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4830360606843672e-05, "loss": 0.6624, "step": 6178 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.482861409469374e-05, "loss": 0.6719, "step": 6179 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.482686739045327e-05, "loss": 0.6226, "step": 6180 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.482512049419174e-05, "loss": 0.6214, "step": 6181 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4823373405978659e-05, "loss": 0.6994, "step": 6182 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4821626125883518e-05, "loss": 0.6678, "step": 6183 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4819878653975826e-05, "loss": 0.5827, "step": 6184 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4818130990325106e-05, "loss": 0.6097, "step": 6185 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4816383135000885e-05, "loss": 0.6793, "step": 6186 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4814635088072688e-05, "loss": 0.6895, "step": 6187 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4812886849610062e-05, "loss": 0.5801, "step": 6188 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4811138419682549e-05, "loss": 0.7047, "step": 6189 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.480938979835971e-05, "loss": 0.6461, "step": 6190 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4807640985711107e-05, "loss": 0.7296, "step": 6191 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4805891981806306e-05, "loss": 0.751, "step": 6192 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4804142786714892e-05, "loss": 0.7274, "step": 6193 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4802393400506447e-05, "loss": 0.6932, "step": 6194 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4800643823250564e-05, "loss": 0.6477, "step": 6195 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4798894055016848e-05, "loss": 0.7167, "step": 6196 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4797144095874907e-05, "loss": 0.5944, "step": 6197 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4795393945894353e-05, "loss": 0.6477, "step": 6198 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4793643605144814e-05, "loss": 0.6499, "step": 6199 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4791893073695918e-05, "loss": 0.7615, "step": 6200 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4790142351617311e-05, "loss": 0.6217, "step": 6201 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4788391438978632e-05, "loss": 0.658, "step": 6202 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4786640335849539e-05, "loss": 0.6573, "step": 6203 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4784889042299693e-05, "loss": 0.6493, "step": 6204 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4783137558398766e-05, "loss": 0.6476, "step": 6205 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4781385884216433e-05, "loss": 0.6575, "step": 6206 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4779634019822378e-05, "loss": 0.6817, "step": 6207 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.477788196528629e-05, "loss": 0.6449, "step": 6208 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4776129720677878e-05, "loss": 0.8082, "step": 6209 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.477437728606684e-05, "loss": 0.6746, "step": 6210 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4772624661522897e-05, "loss": 0.6199, "step": 6211 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4770871847115767e-05, "loss": 0.7286, "step": 6212 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4769118842915183e-05, "loss": 0.6234, "step": 6213 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.476736564899088e-05, "loss": 0.6732, "step": 6214 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4765612265412608e-05, "loss": 0.549, "step": 6215 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4763858692250115e-05, "loss": 0.7309, "step": 6216 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.476210492957316e-05, "loss": 0.735, "step": 6217 }, { "epoch": 1.12, "grad_norm": 0.0, "learning_rate": 1.4760350977451517e-05, "loss": 0.5892, "step": 6218 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4758596835954953e-05, "loss": 0.6631, "step": 6219 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4756842505153257e-05, "loss": 0.7094, "step": 6220 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4755087985116216e-05, "loss": 0.809, "step": 6221 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4753333275913629e-05, "loss": 0.6481, "step": 6222 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4751578377615304e-05, "loss": 0.6855, "step": 6223 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4749823290291048e-05, "loss": 0.6577, "step": 6224 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4748068014010685e-05, "loss": 0.6636, "step": 6225 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4746312548844044e-05, "loss": 0.5877, "step": 6226 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4744556894860958e-05, "loss": 0.7003, "step": 6227 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4742801052131273e-05, "loss": 0.6478, "step": 6228 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4741045020724836e-05, "loss": 0.5707, "step": 6229 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4739288800711502e-05, "loss": 0.6091, "step": 6230 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4737532392161145e-05, "loss": 0.56, "step": 6231 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4735775795143635e-05, "loss": 0.6913, "step": 6232 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4734019009728848e-05, "loss": 0.6282, "step": 6233 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4732262035986675e-05, "loss": 0.5733, "step": 6234 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4730504873987013e-05, "loss": 0.8134, "step": 6235 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4728747523799762e-05, "loss": 0.5836, "step": 6236 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4726989985494834e-05, "loss": 0.7001, "step": 6237 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4725232259142148e-05, "loss": 0.7206, "step": 6238 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4723474344811628e-05, "loss": 0.6285, "step": 6239 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4721716242573204e-05, "loss": 0.5815, "step": 6240 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4719957952496822e-05, "loss": 0.6171, "step": 6241 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4718199474652427e-05, "loss": 0.7661, "step": 6242 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.471644080910997e-05, "loss": 0.6082, "step": 6243 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4714681955939422e-05, "loss": 0.6696, "step": 6244 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4712922915210747e-05, "loss": 0.7598, "step": 6245 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4711163686993925e-05, "loss": 0.7141, "step": 6246 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4709404271358944e-05, "loss": 0.5052, "step": 6247 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.470764466837579e-05, "loss": 0.7302, "step": 6248 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4705884878114468e-05, "loss": 0.6831, "step": 6249 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4704124900644982e-05, "loss": 0.9038, "step": 6250 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4702364736037352e-05, "loss": 0.6975, "step": 6251 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4700604384361593e-05, "loss": 0.7249, "step": 6252 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4698843845687742e-05, "loss": 0.7461, "step": 6253 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4697083120085833e-05, "loss": 0.7335, "step": 6254 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.469532220762591e-05, "loss": 0.7239, "step": 6255 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.469356110837803e-05, "loss": 0.6384, "step": 6256 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4691799822412243e-05, "loss": 0.5599, "step": 6257 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4690038349798626e-05, "loss": 0.6543, "step": 6258 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4688276690607247e-05, "loss": 0.6206, "step": 6259 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4686514844908186e-05, "loss": 0.6078, "step": 6260 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4684752812771541e-05, "loss": 0.6386, "step": 6261 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4682990594267402e-05, "loss": 0.665, "step": 6262 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4681228189465876e-05, "loss": 0.6168, "step": 6263 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.467946559843707e-05, "loss": 0.6987, "step": 6264 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4677702821251108e-05, "loss": 0.6186, "step": 6265 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4675939857978117e-05, "loss": 0.7225, "step": 6266 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4674176708688225e-05, "loss": 0.8047, "step": 6267 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4672413373451577e-05, "loss": 0.7198, "step": 6268 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4670649852338322e-05, "loss": 0.6241, "step": 6269 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4668886145418612e-05, "loss": 0.7172, "step": 6270 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4667122252762616e-05, "loss": 0.7388, "step": 6271 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4665358174440498e-05, "loss": 0.5951, "step": 6272 }, { "epoch": 1.13, "grad_norm": 0.0, "learning_rate": 1.4663593910522443e-05, "loss": 0.6409, "step": 6273 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4661829461078631e-05, "loss": 0.6555, "step": 6274 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4660064826179259e-05, "loss": 0.6158, "step": 6275 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.465830000589452e-05, "loss": 0.6469, "step": 6276 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4656535000294634e-05, "loss": 0.6077, "step": 6277 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4654769809449803e-05, "loss": 0.6563, "step": 6278 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4653004433430255e-05, "loss": 0.672, "step": 6279 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4651238872306222e-05, "loss": 0.6173, "step": 6280 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4649473126147938e-05, "loss": 0.7107, "step": 6281 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.464770719502565e-05, "loss": 0.5829, "step": 6282 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4645941079009604e-05, "loss": 0.6734, "step": 6283 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4644174778170063e-05, "loss": 0.6546, "step": 6284 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4642408292577298e-05, "loss": 0.5839, "step": 6285 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4640641622301575e-05, "loss": 0.634, "step": 6286 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4638874767413179e-05, "loss": 0.6117, "step": 6287 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4637107727982399e-05, "loss": 0.6347, "step": 6288 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4635340504079527e-05, "loss": 0.611, "step": 6289 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4633573095774872e-05, "loss": 0.7666, "step": 6290 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.463180550313874e-05, "loss": 0.6928, "step": 6291 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4630037726241448e-05, "loss": 0.7196, "step": 6292 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4628269765153327e-05, "loss": 0.6493, "step": 6293 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4626501619944704e-05, "loss": 0.6826, "step": 6294 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4624733290685922e-05, "loss": 0.7065, "step": 6295 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4622964777447327e-05, "loss": 0.7846, "step": 6296 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4621196080299273e-05, "loss": 0.6661, "step": 6297 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4619427199312124e-05, "loss": 0.7125, "step": 6298 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4617658134556245e-05, "loss": 0.5803, "step": 6299 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4615888886102017e-05, "loss": 0.5278, "step": 6300 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4614119454019822e-05, "loss": 0.6866, "step": 6301 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4612349838380048e-05, "loss": 0.6609, "step": 6302 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4610580039253096e-05, "loss": 0.6349, "step": 6303 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4608810056709376e-05, "loss": 0.6329, "step": 6304 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4607039890819295e-05, "loss": 0.6843, "step": 6305 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4605269541653273e-05, "loss": 0.6412, "step": 6306 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4603499009281744e-05, "loss": 0.6207, "step": 6307 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.460172829377513e-05, "loss": 0.7032, "step": 6308 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4599957395203887e-05, "loss": 0.682, "step": 6309 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4598186313638455e-05, "loss": 0.5383, "step": 6310 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.45964150491493e-05, "loss": 0.7128, "step": 6311 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4594643601806875e-05, "loss": 0.7334, "step": 6312 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.459287197168166e-05, "loss": 0.5444, "step": 6313 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4591100158844124e-05, "loss": 0.6391, "step": 6314 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4589328163364767e-05, "loss": 0.6094, "step": 6315 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4587555985314069e-05, "loss": 0.5818, "step": 6316 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4585783624762536e-05, "loss": 0.6031, "step": 6317 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4584011081780673e-05, "loss": 0.6942, "step": 6318 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4582238356438997e-05, "loss": 0.6163, "step": 6319 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4580465448808032e-05, "loss": 0.7456, "step": 6320 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4578692358958302e-05, "loss": 0.5812, "step": 6321 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4576919086960346e-05, "loss": 0.6189, "step": 6322 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.457514563288471e-05, "loss": 0.6854, "step": 6323 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.457337199680194e-05, "loss": 0.5759, "step": 6324 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.45715981787826e-05, "loss": 0.6635, "step": 6325 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4569824178897254e-05, "loss": 0.7122, "step": 6326 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4568049997216469e-05, "loss": 0.6347, "step": 6327 }, { "epoch": 1.14, "grad_norm": 0.0, "learning_rate": 1.4566275633810834e-05, "loss": 0.6242, "step": 6328 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4564501088750928e-05, "loss": 0.71, "step": 6329 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4562726362107354e-05, "loss": 0.7197, "step": 6330 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4560951453950707e-05, "loss": 0.6486, "step": 6331 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4559176364351597e-05, "loss": 0.6757, "step": 6332 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4557401093380639e-05, "loss": 0.692, "step": 6333 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4555625641108462e-05, "loss": 0.7087, "step": 6334 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.455385000760569e-05, "loss": 0.6298, "step": 6335 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4552074192942965e-05, "loss": 0.5943, "step": 6336 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4550298197190928e-05, "loss": 0.6803, "step": 6337 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4548522020420236e-05, "loss": 0.7259, "step": 6338 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4546745662701546e-05, "loss": 0.653, "step": 6339 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4544969124105525e-05, "loss": 0.7075, "step": 6340 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4543192404702847e-05, "loss": 0.6454, "step": 6341 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4541415504564191e-05, "loss": 0.5806, "step": 6342 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4539638423760246e-05, "loss": 0.6086, "step": 6343 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4537861162361706e-05, "loss": 0.7177, "step": 6344 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4536083720439279e-05, "loss": 0.7534, "step": 6345 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4534306098063666e-05, "loss": 0.6447, "step": 6346 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4532528295305593e-05, "loss": 0.653, "step": 6347 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4530750312235777e-05, "loss": 0.7198, "step": 6348 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4528972148924956e-05, "loss": 0.7018, "step": 6349 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.452719380544386e-05, "loss": 0.6834, "step": 6350 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4525415281863241e-05, "loss": 0.6574, "step": 6351 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4523636578253848e-05, "loss": 0.632, "step": 6352 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4521857694686445e-05, "loss": 0.6771, "step": 6353 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4520078631231793e-05, "loss": 0.6987, "step": 6354 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4518299387960673e-05, "loss": 0.7225, "step": 6355 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4516519964943863e-05, "loss": 0.6636, "step": 6356 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4514740362252149e-05, "loss": 0.6238, "step": 6357 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4512960579956332e-05, "loss": 0.6725, "step": 6358 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4511180618127209e-05, "loss": 0.6455, "step": 6359 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4509400476835595e-05, "loss": 0.617, "step": 6360 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4507620156152307e-05, "loss": 0.6495, "step": 6361 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4505839656148161e-05, "loss": 0.5893, "step": 6362 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4504058976894e-05, "loss": 0.6893, "step": 6363 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4502278118460658e-05, "loss": 0.6749, "step": 6364 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4500497080918976e-05, "loss": 0.5819, "step": 6365 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4498715864339809e-05, "loss": 0.714, "step": 6366 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4496934468794022e-05, "loss": 0.7704, "step": 6367 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4495152894352479e-05, "loss": 0.5791, "step": 6368 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.449337114108605e-05, "loss": 0.6816, "step": 6369 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4491589209065618e-05, "loss": 0.6409, "step": 6370 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4489807098362074e-05, "loss": 0.6267, "step": 6371 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4488024809046316e-05, "loss": 0.7055, "step": 6372 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4486242341189239e-05, "loss": 0.7064, "step": 6373 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4484459694861758e-05, "loss": 0.5067, "step": 6374 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4482676870134787e-05, "loss": 0.6391, "step": 6375 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.448089386707925e-05, "loss": 0.5723, "step": 6376 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4479110685766083e-05, "loss": 0.6658, "step": 6377 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4477327326266215e-05, "loss": 0.6531, "step": 6378 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4475543788650598e-05, "loss": 0.5562, "step": 6379 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4473760072990182e-05, "loss": 0.6953, "step": 6380 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4471976179355924e-05, "loss": 0.74, "step": 6381 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4470192107818797e-05, "loss": 0.7092, "step": 6382 }, { "epoch": 1.15, "grad_norm": 0.0, "learning_rate": 1.4468407858449768e-05, "loss": 0.6111, "step": 6383 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4466623431319816e-05, "loss": 0.6473, "step": 6384 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4464838826499938e-05, "loss": 0.5327, "step": 6385 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.446305404406112e-05, "loss": 0.6245, "step": 6386 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4461269084074368e-05, "loss": 0.6702, "step": 6387 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4459483946610689e-05, "loss": 0.5597, "step": 6388 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4457698631741096e-05, "loss": 0.6823, "step": 6389 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4455913139536617e-05, "loss": 0.5478, "step": 6390 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4454127470068283e-05, "loss": 0.5706, "step": 6391 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4452341623407123e-05, "loss": 0.5993, "step": 6392 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4450555599624187e-05, "loss": 0.6477, "step": 6393 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4448769398790525e-05, "loss": 0.5833, "step": 6394 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4446983020977197e-05, "loss": 0.622, "step": 6395 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4445196466255265e-05, "loss": 0.6846, "step": 6396 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4443409734695803e-05, "loss": 0.6379, "step": 6397 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.444162282636989e-05, "loss": 0.6263, "step": 6398 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4439835741348612e-05, "loss": 0.6399, "step": 6399 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.443804847970306e-05, "loss": 0.6587, "step": 6400 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4436261041504338e-05, "loss": 0.6734, "step": 6401 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4434473426823556e-05, "loss": 0.6565, "step": 6402 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.443268563573182e-05, "loss": 0.7221, "step": 6403 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4430897668300257e-05, "loss": 0.5959, "step": 6404 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4429109524599993e-05, "loss": 0.6819, "step": 6405 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4427321204702167e-05, "loss": 0.7852, "step": 6406 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4425532708677918e-05, "loss": 0.5211, "step": 6407 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4423744036598395e-05, "loss": 0.6047, "step": 6408 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4421955188534757e-05, "loss": 0.6677, "step": 6409 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.442016616455817e-05, "loss": 0.7098, "step": 6410 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4418376964739795e-05, "loss": 0.6685, "step": 6411 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4416587589150817e-05, "loss": 0.6367, "step": 6412 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.441479803786242e-05, "loss": 0.644, "step": 6413 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4413008310945793e-05, "loss": 0.6414, "step": 6414 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4411218408472136e-05, "loss": 0.5629, "step": 6415 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4409428330512652e-05, "loss": 0.6728, "step": 6416 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.440763807713856e-05, "loss": 0.6861, "step": 6417 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4405847648421072e-05, "loss": 0.7093, "step": 6418 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4404057044431414e-05, "loss": 0.7083, "step": 6419 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4402266265240825e-05, "loss": 0.5924, "step": 6420 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4400475310920544e-05, "loss": 0.674, "step": 6421 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4398684181541813e-05, "loss": 0.6295, "step": 6422 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4396892877175893e-05, "loss": 0.7064, "step": 6423 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.439510139789404e-05, "loss": 0.6577, "step": 6424 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4393309743767525e-05, "loss": 0.659, "step": 6425 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4391517914867622e-05, "loss": 0.5627, "step": 6426 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4389725911265613e-05, "loss": 0.6546, "step": 6427 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4387933733032789e-05, "loss": 0.6077, "step": 6428 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4386141380240444e-05, "loss": 0.7109, "step": 6429 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4384348852959877e-05, "loss": 0.709, "step": 6430 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4382556151262405e-05, "loss": 0.738, "step": 6431 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4380763275219343e-05, "loss": 0.6239, "step": 6432 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4378970224902012e-05, "loss": 0.6562, "step": 6433 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4377177000381743e-05, "loss": 0.7544, "step": 6434 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4375383601729873e-05, "loss": 0.5971, "step": 6435 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4373590029017751e-05, "loss": 0.6092, "step": 6436 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4371796282316726e-05, "loss": 0.665, "step": 6437 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.437000236169815e-05, "loss": 0.7473, "step": 6438 }, { "epoch": 1.16, "grad_norm": 0.0, "learning_rate": 1.4368208267233399e-05, "loss": 0.6887, "step": 6439 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4366413998993839e-05, "loss": 0.6527, "step": 6440 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.436461955705085e-05, "loss": 0.5469, "step": 6441 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4362824941475815e-05, "loss": 0.6563, "step": 6442 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4361030152340135e-05, "loss": 0.6271, "step": 6443 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.43592351897152e-05, "loss": 0.7547, "step": 6444 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4357440053672424e-05, "loss": 0.6644, "step": 6445 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4355644744283215e-05, "loss": 0.6039, "step": 6446 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4353849261619001e-05, "loss": 0.6529, "step": 6447 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4352053605751203e-05, "loss": 0.6753, "step": 6448 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4350257776751255e-05, "loss": 0.5859, "step": 6449 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4348461774690602e-05, "loss": 0.6034, "step": 6450 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.434666559964069e-05, "loss": 0.6167, "step": 6451 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4344869251672974e-05, "loss": 0.6349, "step": 6452 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4343072730858918e-05, "loss": 0.6259, "step": 6453 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4341276037269984e-05, "loss": 0.7155, "step": 6454 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4339479170977657e-05, "loss": 0.7039, "step": 6455 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4337682132053415e-05, "loss": 0.6045, "step": 6456 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4335884920568743e-05, "loss": 0.6042, "step": 6457 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4334087536595143e-05, "loss": 0.6268, "step": 6458 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4332289980204114e-05, "loss": 0.6621, "step": 6459 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4330492251467171e-05, "loss": 0.6224, "step": 6460 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4328694350455827e-05, "loss": 0.6729, "step": 6461 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4326896277241604e-05, "loss": 0.6726, "step": 6462 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.432509803189604e-05, "loss": 0.6738, "step": 6463 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4323299614490663e-05, "loss": 0.6605, "step": 6464 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.432150102509702e-05, "loss": 0.7086, "step": 6465 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4319702263786668e-05, "loss": 0.75, "step": 6466 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4317903330631156e-05, "loss": 0.631, "step": 6467 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4316104225702052e-05, "loss": 0.6213, "step": 6468 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.431430494907093e-05, "loss": 0.7388, "step": 6469 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4312505500809364e-05, "loss": 0.6987, "step": 6470 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4310705880988942e-05, "loss": 0.659, "step": 6471 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4308906089681257e-05, "loss": 0.6471, "step": 6472 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4307106126957906e-05, "loss": 0.6692, "step": 6473 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4305305992890495e-05, "loss": 0.6441, "step": 6474 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4303505687550636e-05, "loss": 0.6762, "step": 6475 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4301705211009947e-05, "loss": 0.6388, "step": 6476 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4299904563340054e-05, "loss": 0.5195, "step": 6477 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4298103744612598e-05, "loss": 0.5758, "step": 6478 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4296302754899205e-05, "loss": 0.5638, "step": 6479 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4294501594271532e-05, "loss": 0.6434, "step": 6480 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4292700262801226e-05, "loss": 0.6838, "step": 6481 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4290898760559952e-05, "loss": 0.6229, "step": 6482 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4289097087619377e-05, "loss": 0.6866, "step": 6483 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4287295244051167e-05, "loss": 0.5653, "step": 6484 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4285493229927012e-05, "loss": 0.6873, "step": 6485 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4283691045318591e-05, "loss": 0.6438, "step": 6486 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4281888690297605e-05, "loss": 0.6883, "step": 6487 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4280086164935749e-05, "loss": 0.6598, "step": 6488 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4278283469304737e-05, "loss": 0.6774, "step": 6489 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4276480603476275e-05, "loss": 0.64, "step": 6490 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4274677567522091e-05, "loss": 0.5701, "step": 6491 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4272874361513908e-05, "loss": 0.5691, "step": 6492 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4271070985523467e-05, "loss": 0.6528, "step": 6493 }, { "epoch": 1.17, "grad_norm": 0.0, "learning_rate": 1.4269267439622506e-05, "loss": 0.5998, "step": 6494 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4267463723882768e-05, "loss": 0.6417, "step": 6495 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4265659838376014e-05, "loss": 0.5506, "step": 6496 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4263855783174006e-05, "loss": 0.5473, "step": 6497 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.426205155834851e-05, "loss": 0.6066, "step": 6498 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.42602471639713e-05, "loss": 0.6574, "step": 6499 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4258442600114161e-05, "loss": 0.6852, "step": 6500 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4256637866848881e-05, "loss": 0.7241, "step": 6501 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4254832964247252e-05, "loss": 0.6031, "step": 6502 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.425302789238108e-05, "loss": 0.5964, "step": 6503 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4251222651322173e-05, "loss": 0.57, "step": 6504 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4249417241142344e-05, "loss": 0.6745, "step": 6505 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4247611661913421e-05, "loss": 0.6964, "step": 6506 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4245805913707229e-05, "loss": 0.6428, "step": 6507 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4243999996595603e-05, "loss": 0.637, "step": 6508 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4242193910650388e-05, "loss": 0.7353, "step": 6509 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.424038765594343e-05, "loss": 0.5896, "step": 6510 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4238581232546586e-05, "loss": 0.6619, "step": 6511 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4236774640531725e-05, "loss": 0.5618, "step": 6512 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4234967879970708e-05, "loss": 0.6328, "step": 6513 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.423316095093541e-05, "loss": 0.6349, "step": 6514 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.423135385349772e-05, "loss": 0.728, "step": 6515 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4229546587729529e-05, "loss": 0.6956, "step": 6516 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4227739153702725e-05, "loss": 0.6784, "step": 6517 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4225931551489213e-05, "loss": 0.7216, "step": 6518 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.422412378116091e-05, "loss": 0.6627, "step": 6519 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4222315842789724e-05, "loss": 0.7503, "step": 6520 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.422050773644758e-05, "loss": 0.5487, "step": 6521 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4218699462206407e-05, "loss": 0.6869, "step": 6522 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4216891020138145e-05, "loss": 0.6436, "step": 6523 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4215082410314733e-05, "loss": 0.4937, "step": 6524 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.421327363280812e-05, "loss": 0.5783, "step": 6525 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4211464687690269e-05, "loss": 0.5852, "step": 6526 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4209655575033135e-05, "loss": 0.6542, "step": 6527 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4207846294908692e-05, "loss": 0.643, "step": 6528 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4206036847388914e-05, "loss": 0.6027, "step": 6529 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4204227232545781e-05, "loss": 0.6925, "step": 6530 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4202417450451294e-05, "loss": 0.6885, "step": 6531 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4200607501177435e-05, "loss": 0.7014, "step": 6532 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4198797384796216e-05, "loss": 0.7626, "step": 6533 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4196987101379645e-05, "loss": 0.7214, "step": 6534 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4195176650999738e-05, "loss": 0.5655, "step": 6535 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4193366033728516e-05, "loss": 0.5569, "step": 6536 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.419155524963801e-05, "loss": 0.6629, "step": 6537 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4189744298800255e-05, "loss": 0.7121, "step": 6538 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4187933181287294e-05, "loss": 0.607, "step": 6539 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4186121897171177e-05, "loss": 0.6012, "step": 6540 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4184310446523958e-05, "loss": 0.7355, "step": 6541 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4182498829417706e-05, "loss": 0.6194, "step": 6542 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4180687045924479e-05, "loss": 0.7185, "step": 6543 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4178875096116365e-05, "loss": 0.6254, "step": 6544 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.417706298006544e-05, "loss": 0.6321, "step": 6545 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4175250697843792e-05, "loss": 0.71, "step": 6546 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.417343824952352e-05, "loss": 0.616, "step": 6547 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4171625635176724e-05, "loss": 0.6112, "step": 6548 }, { "epoch": 1.18, "grad_norm": 0.0, "learning_rate": 1.4169812854875512e-05, "loss": 0.5542, "step": 6549 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4167999908692007e-05, "loss": 0.5723, "step": 6550 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4166186796698321e-05, "loss": 0.6446, "step": 6551 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4164373518966588e-05, "loss": 0.6638, "step": 6552 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4162560075568945e-05, "loss": 0.5779, "step": 6553 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4160746466577529e-05, "loss": 0.6312, "step": 6554 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4158932692064489e-05, "loss": 0.702, "step": 6555 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4157118752101983e-05, "loss": 0.6285, "step": 6556 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4155304646762174e-05, "loss": 0.6233, "step": 6557 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4153490376117227e-05, "loss": 0.6417, "step": 6558 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4151675940239316e-05, "loss": 0.5477, "step": 6559 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4149861339200622e-05, "loss": 0.6252, "step": 6560 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4148046573073339e-05, "loss": 0.7247, "step": 6561 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4146231641929653e-05, "loss": 0.6778, "step": 6562 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4144416545841772e-05, "loss": 0.74, "step": 6563 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4142601284881904e-05, "loss": 0.6351, "step": 6564 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4140785859122255e-05, "loss": 0.5663, "step": 6565 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4138970268635055e-05, "loss": 0.6872, "step": 6566 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4137154513492526e-05, "loss": 0.7388, "step": 6567 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.41353385937669e-05, "loss": 0.7095, "step": 6568 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4133522509530424e-05, "loss": 0.6956, "step": 6569 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.413170626085534e-05, "loss": 0.6246, "step": 6570 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4129889847813903e-05, "loss": 0.6416, "step": 6571 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4128073270478375e-05, "loss": 0.6479, "step": 6572 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4126256528921018e-05, "loss": 0.588, "step": 6573 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4124439623214108e-05, "loss": 0.6426, "step": 6574 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4122622553429926e-05, "loss": 0.6837, "step": 6575 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4120805319640755e-05, "loss": 0.5652, "step": 6576 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4118987921918889e-05, "loss": 0.5983, "step": 6577 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4117170360336625e-05, "loss": 0.6688, "step": 6578 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4115352634966274e-05, "loss": 0.6029, "step": 6579 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4113534745880147e-05, "loss": 0.6971, "step": 6580 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4111716693150557e-05, "loss": 0.5622, "step": 6581 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4109898476849835e-05, "loss": 0.6418, "step": 6582 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.410808009705031e-05, "loss": 0.6504, "step": 6583 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4106261553824323e-05, "loss": 0.6189, "step": 6584 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4104442847244216e-05, "loss": 0.5756, "step": 6585 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4102623977382342e-05, "loss": 0.6358, "step": 6586 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4100804944311052e-05, "loss": 0.6993, "step": 6587 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4098985748102725e-05, "loss": 0.6864, "step": 6588 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4097166388829719e-05, "loss": 0.6304, "step": 6589 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4095346866564415e-05, "loss": 0.6164, "step": 6590 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.40935271813792e-05, "loss": 0.7625, "step": 6591 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4091707333346455e-05, "loss": 0.637, "step": 6592 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.408988732253859e-05, "loss": 0.6508, "step": 6593 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4088067149027994e-05, "loss": 0.68, "step": 6594 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4086246812887087e-05, "loss": 0.6651, "step": 6595 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4084426314188284e-05, "loss": 0.7447, "step": 6596 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4082605653004e-05, "loss": 0.6077, "step": 6597 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4080784829406673e-05, "loss": 0.579, "step": 6598 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4078963843468737e-05, "loss": 0.6264, "step": 6599 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4077142695262625e-05, "loss": 0.635, "step": 6600 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4075321384860793e-05, "loss": 0.6315, "step": 6601 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.40734999123357e-05, "loss": 0.6557, "step": 6602 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4071678277759799e-05, "loss": 0.6525, "step": 6603 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4069856481205562e-05, "loss": 0.6484, "step": 6604 }, { "epoch": 1.19, "grad_norm": 0.0, "learning_rate": 1.4068034522745461e-05, "loss": 0.7092, "step": 6605 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.406621240245198e-05, "loss": 0.626, "step": 6606 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4064390120397602e-05, "loss": 0.9009, "step": 6607 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4062567676654819e-05, "loss": 0.595, "step": 6608 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4060745071296138e-05, "loss": 0.6359, "step": 6609 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4058922304394065e-05, "loss": 0.5685, "step": 6610 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4057099376021104e-05, "loss": 0.6805, "step": 6611 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4055276286249782e-05, "loss": 0.6583, "step": 6612 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.405345303515262e-05, "loss": 0.6891, "step": 6613 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4051629622802154e-05, "loss": 0.665, "step": 6614 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4049806049270923e-05, "loss": 0.7064, "step": 6615 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4047982314631465e-05, "loss": 0.5346, "step": 6616 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4046158418956337e-05, "loss": 0.6012, "step": 6617 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.40443343623181e-05, "loss": 0.7361, "step": 6618 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.404251014478931e-05, "loss": 0.656, "step": 6619 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4040685766442541e-05, "loss": 0.5911, "step": 6620 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.403886122735037e-05, "loss": 0.6639, "step": 6621 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.403703652758538e-05, "loss": 0.5258, "step": 6622 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4035211667220166e-05, "loss": 0.6849, "step": 6623 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4033386646327313e-05, "loss": 0.6173, "step": 6624 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4031561464979436e-05, "loss": 0.6033, "step": 6625 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4029736123249134e-05, "loss": 0.5687, "step": 6626 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4027910621209023e-05, "loss": 0.6271, "step": 6627 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.402608495893173e-05, "loss": 0.6549, "step": 6628 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.402425913648988e-05, "loss": 0.6777, "step": 6629 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4022433153956107e-05, "loss": 0.6645, "step": 6630 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4020607011403056e-05, "loss": 0.6128, "step": 6631 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4018780708903365e-05, "loss": 0.5895, "step": 6632 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4016954246529697e-05, "loss": 0.6187, "step": 6633 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4015127624354705e-05, "loss": 0.6578, "step": 6634 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4013300842451058e-05, "loss": 0.7163, "step": 6635 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4011473900891428e-05, "loss": 0.7576, "step": 6636 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.40096467997485e-05, "loss": 0.6779, "step": 6637 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4007819539094945e-05, "loss": 0.6717, "step": 6638 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4005992119003468e-05, "loss": 0.7358, "step": 6639 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.400416453954676e-05, "loss": 0.5905, "step": 6640 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4002336800797527e-05, "loss": 0.6714, "step": 6641 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.4000508902828482e-05, "loss": 0.6434, "step": 6642 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3998680845712335e-05, "loss": 0.6405, "step": 6643 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.399685262952182e-05, "loss": 0.6408, "step": 6644 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3995024254329657e-05, "loss": 0.6818, "step": 6645 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3993195720208583e-05, "loss": 0.6308, "step": 6646 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3991367027231345e-05, "loss": 0.7337, "step": 6647 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.398953817547069e-05, "loss": 0.5515, "step": 6648 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.398770916499937e-05, "loss": 0.7243, "step": 6649 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.398587999589015e-05, "loss": 0.5843, "step": 6650 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3984050668215793e-05, "loss": 0.7014, "step": 6651 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3982221182049078e-05, "loss": 0.6396, "step": 6652 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3980391537462783e-05, "loss": 0.7228, "step": 6653 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.397856173452969e-05, "loss": 0.7608, "step": 6654 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3976731773322598e-05, "loss": 0.5936, "step": 6655 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3974901653914306e-05, "loss": 0.616, "step": 6656 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3973071376377612e-05, "loss": 0.6537, "step": 6657 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.3971240940785336e-05, "loss": 0.5834, "step": 6658 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.396941034721029e-05, "loss": 0.6746, "step": 6659 }, { "epoch": 1.2, "grad_norm": 0.0, "learning_rate": 1.39675795957253e-05, "loss": 0.7117, "step": 6660 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.39657486864032e-05, "loss": 0.6061, "step": 6661 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.396391761931682e-05, "loss": 0.6394, "step": 6662 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3962086394539009e-05, "loss": 0.7464, "step": 6663 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3960255012142613e-05, "loss": 0.7714, "step": 6664 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3958423472200488e-05, "loss": 0.6308, "step": 6665 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3956591774785496e-05, "loss": 0.621, "step": 6666 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3954759919970506e-05, "loss": 0.5092, "step": 6667 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.395292790782839e-05, "loss": 0.6587, "step": 6668 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3951095738432031e-05, "loss": 0.7937, "step": 6669 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3949263411854315e-05, "loss": 0.6835, "step": 6670 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3947430928168138e-05, "loss": 0.5896, "step": 6671 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3945598287446394e-05, "loss": 0.6922, "step": 6672 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3943765489761986e-05, "loss": 0.6744, "step": 6673 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3941932535187836e-05, "loss": 0.5875, "step": 6674 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.394009942379686e-05, "loss": 0.6544, "step": 6675 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3938266155661973e-05, "loss": 0.6349, "step": 6676 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3936432730856112e-05, "loss": 0.5973, "step": 6677 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3934599149452217e-05, "loss": 0.6744, "step": 6678 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3932765411523226e-05, "loss": 0.6171, "step": 6679 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.393093151714209e-05, "loss": 0.6341, "step": 6680 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3929097466381762e-05, "loss": 0.5261, "step": 6681 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3927263259315212e-05, "loss": 0.601, "step": 6682 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3925428896015396e-05, "loss": 0.6636, "step": 6683 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3923594376555294e-05, "loss": 0.5386, "step": 6684 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3921759701007886e-05, "loss": 0.6357, "step": 6685 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3919924869446162e-05, "loss": 0.6219, "step": 6686 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3918089881943108e-05, "loss": 0.6614, "step": 6687 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.391625473857173e-05, "loss": 0.6782, "step": 6688 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3914419439405027e-05, "loss": 0.6057, "step": 6689 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3912583984516015e-05, "loss": 0.635, "step": 6690 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.391074837397771e-05, "loss": 0.7708, "step": 6691 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.390891260786313e-05, "loss": 0.7012, "step": 6692 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3907076686245313e-05, "loss": 0.633, "step": 6693 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3905240609197295e-05, "loss": 0.7239, "step": 6694 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.390340437679211e-05, "loss": 0.5455, "step": 6695 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3901567989102818e-05, "loss": 0.6545, "step": 6696 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3899731446202465e-05, "loss": 0.6591, "step": 6697 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3897894748164114e-05, "loss": 0.5946, "step": 6698 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3896057895060833e-05, "loss": 0.7455, "step": 6699 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3894220886965692e-05, "loss": 0.6887, "step": 6700 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3892383723951777e-05, "loss": 0.6177, "step": 6701 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3890546406092168e-05, "loss": 0.6065, "step": 6702 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3888708933459957e-05, "loss": 0.7044, "step": 6703 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3886871306128244e-05, "loss": 0.6936, "step": 6704 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3885033524170131e-05, "loss": 0.6341, "step": 6705 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.388319558765873e-05, "loss": 0.6729, "step": 6706 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3881357496667157e-05, "loss": 0.6634, "step": 6707 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3879519251268531e-05, "loss": 0.6638, "step": 6708 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3877680851535986e-05, "loss": 0.5535, "step": 6709 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3875842297542654e-05, "loss": 0.6001, "step": 6710 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.387400358936167e-05, "loss": 0.7429, "step": 6711 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3872164727066193e-05, "loss": 0.5549, "step": 6712 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3870325710729366e-05, "loss": 0.5752, "step": 6713 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3868486540424355e-05, "loss": 0.6731, "step": 6714 }, { "epoch": 1.21, "grad_norm": 0.0, "learning_rate": 1.3866647216224322e-05, "loss": 0.712, "step": 6715 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3864807738202436e-05, "loss": 0.5681, "step": 6716 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3862968106431882e-05, "loss": 0.621, "step": 6717 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3861128320985837e-05, "loss": 0.5455, "step": 6718 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.385928838193749e-05, "loss": 0.6346, "step": 6719 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3857448289360045e-05, "loss": 0.608, "step": 6720 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3855608043326696e-05, "loss": 0.5837, "step": 6721 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3853767643910651e-05, "loss": 0.6265, "step": 6722 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.385192709118513e-05, "loss": 0.6769, "step": 6723 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.385008638522335e-05, "loss": 0.6133, "step": 6724 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.384824552609854e-05, "loss": 0.6562, "step": 6725 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3846404513883931e-05, "loss": 0.6235, "step": 6726 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3844563348652757e-05, "loss": 0.5538, "step": 6727 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3842722030478272e-05, "loss": 0.6617, "step": 6728 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.384088055943372e-05, "loss": 0.6221, "step": 6729 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3839038935592358e-05, "loss": 0.5632, "step": 6730 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3837197159027453e-05, "loss": 0.6667, "step": 6731 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3835355229812266e-05, "loss": 0.642, "step": 6732 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3833513148020083e-05, "loss": 0.646, "step": 6733 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3831670913724177e-05, "loss": 0.7353, "step": 6734 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3829828526997841e-05, "loss": 0.7054, "step": 6735 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3827985987914363e-05, "loss": 0.6112, "step": 6736 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3826143296547045e-05, "loss": 0.5832, "step": 6737 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.382430045296919e-05, "loss": 0.7196, "step": 6738 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3822457457254119e-05, "loss": 0.5373, "step": 6739 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3820614309475135e-05, "loss": 0.6693, "step": 6740 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3818771009705572e-05, "loss": 0.592, "step": 6741 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3816927558018753e-05, "loss": 0.6034, "step": 6742 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.381508395448802e-05, "loss": 0.5897, "step": 6743 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.381324019918671e-05, "loss": 0.5877, "step": 6744 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3811396292188174e-05, "loss": 0.5836, "step": 6745 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3809552233565762e-05, "loss": 0.6205, "step": 6746 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3807708023392841e-05, "loss": 0.6176, "step": 6747 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3805863661742767e-05, "loss": 0.5972, "step": 6748 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3804019148688916e-05, "loss": 0.6508, "step": 6749 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3802174484304671e-05, "loss": 0.6737, "step": 6750 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3800329668663408e-05, "loss": 0.6821, "step": 6751 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3798484701838522e-05, "loss": 0.6539, "step": 6752 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3796639583903408e-05, "loss": 0.6399, "step": 6753 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3794794314931465e-05, "loss": 0.6205, "step": 6754 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3792948894996106e-05, "loss": 0.5929, "step": 6755 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.379110332417074e-05, "loss": 0.6017, "step": 6756 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3789257602528789e-05, "loss": 0.6223, "step": 6757 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3787411730143683e-05, "loss": 0.6443, "step": 6758 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3785565707088845e-05, "loss": 0.5625, "step": 6759 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.378371953343772e-05, "loss": 0.5578, "step": 6760 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3781873209263754e-05, "loss": 0.5909, "step": 6761 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3780026734640387e-05, "loss": 0.7724, "step": 6762 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3778180109641086e-05, "loss": 0.6239, "step": 6763 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3776333334339308e-05, "loss": 0.6421, "step": 6764 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3774486408808514e-05, "loss": 0.6361, "step": 6765 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3772639333122192e-05, "loss": 0.7059, "step": 6766 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3770792107353811e-05, "loss": 0.713, "step": 6767 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.376894473157686e-05, "loss": 0.5942, "step": 6768 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3767097205864835e-05, "loss": 0.5648, "step": 6769 }, { "epoch": 1.22, "grad_norm": 0.0, "learning_rate": 1.3765249530291223e-05, "loss": 0.6405, "step": 6770 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3763401704929537e-05, "loss": 0.7286, "step": 6771 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3761553729853288e-05, "loss": 0.5959, "step": 6772 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3759705605135984e-05, "loss": 0.6418, "step": 6773 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.375785733085115e-05, "loss": 0.6326, "step": 6774 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3756008907072315e-05, "loss": 0.8417, "step": 6775 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.375416033387301e-05, "loss": 0.7454, "step": 6776 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3752311611326779e-05, "loss": 0.6359, "step": 6777 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.375046273950716e-05, "loss": 0.6055, "step": 6778 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3748613718487711e-05, "loss": 0.6098, "step": 6779 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3746764548341986e-05, "loss": 0.5893, "step": 6780 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.374491522914355e-05, "loss": 0.7484, "step": 6781 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3743065760965968e-05, "loss": 0.7106, "step": 6782 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3741216143882819e-05, "loss": 0.6777, "step": 6783 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3739366377967685e-05, "loss": 0.5919, "step": 6784 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3737516463294148e-05, "loss": 0.7381, "step": 6785 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3735666399935801e-05, "loss": 0.5544, "step": 6786 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.373381618796625e-05, "loss": 0.5599, "step": 6787 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3731965827459092e-05, "loss": 0.6438, "step": 6788 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.373011531848794e-05, "loss": 0.6797, "step": 6789 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3728264661126412e-05, "loss": 0.5902, "step": 6790 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3726413855448128e-05, "loss": 0.7547, "step": 6791 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3724562901526717e-05, "loss": 0.6767, "step": 6792 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3722711799435815e-05, "loss": 0.7147, "step": 6793 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3720860549249058e-05, "loss": 0.5759, "step": 6794 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3719009151040093e-05, "loss": 0.5892, "step": 6795 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3717157604882576e-05, "loss": 0.6302, "step": 6796 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.371530591085016e-05, "loss": 0.659, "step": 6797 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.371345406901651e-05, "loss": 0.6153, "step": 6798 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3711602079455297e-05, "loss": 0.6249, "step": 6799 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3709749942240195e-05, "loss": 0.7109, "step": 6800 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3707897657444885e-05, "loss": 0.6038, "step": 6801 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3706045225143053e-05, "loss": 0.7573, "step": 6802 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3704192645408394e-05, "loss": 0.7323, "step": 6803 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3702339918314606e-05, "loss": 0.6664, "step": 6804 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.370048704393539e-05, "loss": 0.6268, "step": 6805 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3698634022344466e-05, "loss": 0.6457, "step": 6806 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3696780853615543e-05, "loss": 0.6753, "step": 6807 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3694927537822342e-05, "loss": 0.7393, "step": 6808 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3693074075038598e-05, "loss": 0.6823, "step": 6809 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3691220465338036e-05, "loss": 0.5655, "step": 6810 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3689366708794402e-05, "loss": 0.7675, "step": 6811 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3687512805481443e-05, "loss": 0.6008, "step": 6812 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3685658755472904e-05, "loss": 0.6265, "step": 6813 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3683804558842545e-05, "loss": 0.6852, "step": 6814 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3681950215664135e-05, "loss": 0.5554, "step": 6815 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3680095726011434e-05, "loss": 0.693, "step": 6816 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.367824108995822e-05, "loss": 0.7521, "step": 6817 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3676386307578278e-05, "loss": 0.6193, "step": 6818 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3674531378945389e-05, "loss": 0.5931, "step": 6819 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3672676304133346e-05, "loss": 0.6803, "step": 6820 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3670821083215948e-05, "loss": 0.5841, "step": 6821 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3668965716267e-05, "loss": 0.6018, "step": 6822 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3667110203360309e-05, "loss": 0.6697, "step": 6823 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3665254544569691e-05, "loss": 0.6527, "step": 6824 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3663398739968972e-05, "loss": 0.6239, "step": 6825 }, { "epoch": 1.23, "grad_norm": 0.0, "learning_rate": 1.3661542789631973e-05, "loss": 0.658, "step": 6826 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.365968669363253e-05, "loss": 0.6634, "step": 6827 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3657830452044481e-05, "loss": 0.5845, "step": 6828 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3655974064941669e-05, "loss": 0.6254, "step": 6829 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3654117532397946e-05, "loss": 0.6518, "step": 6830 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.365226085448717e-05, "loss": 0.6562, "step": 6831 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3650404031283198e-05, "loss": 0.5476, "step": 6832 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.36485470628599e-05, "loss": 0.6695, "step": 6833 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3646689949291151e-05, "loss": 0.7264, "step": 6834 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3644832690650829e-05, "loss": 0.6271, "step": 6835 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3642975287012816e-05, "loss": 0.6696, "step": 6836 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3641117738451008e-05, "loss": 0.6653, "step": 6837 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.36392600450393e-05, "loss": 0.6288, "step": 6838 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3637402206851591e-05, "loss": 0.593, "step": 6839 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3635544223961792e-05, "loss": 0.5785, "step": 6840 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3633686096443813e-05, "loss": 0.5883, "step": 6841 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.363182782437158e-05, "loss": 0.5667, "step": 6842 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3629969407819008e-05, "loss": 0.5854, "step": 6843 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.362811084686004e-05, "loss": 0.6718, "step": 6844 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3626252141568608e-05, "loss": 0.7166, "step": 6845 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3624393292018651e-05, "loss": 0.7265, "step": 6846 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.362253429828412e-05, "loss": 0.61, "step": 6847 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3620675160438972e-05, "loss": 0.6612, "step": 6848 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.361881587855716e-05, "loss": 0.6634, "step": 6849 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3616956452712656e-05, "loss": 0.6244, "step": 6850 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3615096882979423e-05, "loss": 0.6399, "step": 6851 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3613237169431446e-05, "loss": 0.6471, "step": 6852 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3611377312142709e-05, "loss": 0.715, "step": 6853 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3609517311187188e-05, "loss": 0.5824, "step": 6854 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.360765716663889e-05, "loss": 0.5898, "step": 6855 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.360579687857181e-05, "loss": 0.5841, "step": 6856 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3603936447059951e-05, "loss": 0.6236, "step": 6857 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3602075872177327e-05, "loss": 0.5809, "step": 6858 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3600215153997954e-05, "loss": 0.6622, "step": 6859 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3598354292595852e-05, "loss": 0.6354, "step": 6860 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3596493288045057e-05, "loss": 0.6788, "step": 6861 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3594632140419592e-05, "loss": 0.7488, "step": 6862 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3592770849793507e-05, "loss": 0.6275, "step": 6863 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3590909416240842e-05, "loss": 0.6942, "step": 6864 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3589047839835647e-05, "loss": 0.5962, "step": 6865 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3587186120651982e-05, "loss": 0.7559, "step": 6866 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.358532425876391e-05, "loss": 0.6524, "step": 6867 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3583462254245495e-05, "loss": 0.7004, "step": 6868 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3581600107170814e-05, "loss": 0.6393, "step": 6869 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.357973781761394e-05, "loss": 0.5298, "step": 6870 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3577875385648969e-05, "loss": 0.632, "step": 6871 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3576012811349983e-05, "loss": 0.6248, "step": 6872 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3574150094791084e-05, "loss": 0.7041, "step": 6873 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.357228723604637e-05, "loss": 0.7767, "step": 6874 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3570424235189947e-05, "loss": 0.6954, "step": 6875 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3568561092295936e-05, "loss": 0.8325, "step": 6876 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.356669780743845e-05, "loss": 0.7254, "step": 6877 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3564834380691612e-05, "loss": 0.7032, "step": 6878 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3562970812129559e-05, "loss": 0.6503, "step": 6879 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3561107101826424e-05, "loss": 0.775, "step": 6880 }, { "epoch": 1.24, "grad_norm": 0.0, "learning_rate": 1.3559243249856344e-05, "loss": 0.6701, "step": 6881 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3557379256293473e-05, "loss": 0.6524, "step": 6882 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.355551512121196e-05, "loss": 0.684, "step": 6883 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3553650844685967e-05, "loss": 0.5472, "step": 6884 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3551786426789652e-05, "loss": 0.7535, "step": 6885 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.354992186759719e-05, "loss": 0.6471, "step": 6886 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3548057167182758e-05, "loss": 0.7767, "step": 6887 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3546192325620531e-05, "loss": 0.7214, "step": 6888 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3544327342984695e-05, "loss": 0.5025, "step": 6889 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.354246221934945e-05, "loss": 0.5924, "step": 6890 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3540596954788988e-05, "loss": 0.5961, "step": 6891 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3538731549377512e-05, "loss": 0.6251, "step": 6892 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3536866003189234e-05, "loss": 0.7147, "step": 6893 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.353500031629837e-05, "loss": 0.6415, "step": 6894 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3533134488779136e-05, "loss": 0.5573, "step": 6895 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.353126852070576e-05, "loss": 0.6252, "step": 6896 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3529402412152468e-05, "loss": 0.6795, "step": 6897 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3527536163193509e-05, "loss": 0.6134, "step": 6898 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3525669773903114e-05, "loss": 0.6644, "step": 6899 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3523803244355535e-05, "loss": 0.5439, "step": 6900 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3521936574625029e-05, "loss": 0.7359, "step": 6901 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3520069764785853e-05, "loss": 0.5326, "step": 6902 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.351820281491227e-05, "loss": 0.5976, "step": 6903 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3516335725078552e-05, "loss": 0.7676, "step": 6904 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3514468495358974e-05, "loss": 0.7166, "step": 6905 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3512601125827823e-05, "loss": 0.6548, "step": 6906 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3510733616559376e-05, "loss": 0.7587, "step": 6907 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3508865967627933e-05, "loss": 0.6099, "step": 6908 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3506998179107796e-05, "loss": 0.5617, "step": 6909 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.350513025107326e-05, "loss": 0.7643, "step": 6910 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3503262183598636e-05, "loss": 0.6315, "step": 6911 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3501393976758242e-05, "loss": 0.6256, "step": 6912 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3499525630626397e-05, "loss": 0.6565, "step": 6913 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.349765714527743e-05, "loss": 0.5861, "step": 6914 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3495788520785666e-05, "loss": 0.6211, "step": 6915 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3493919757225449e-05, "loss": 0.6036, "step": 6916 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.349205085467112e-05, "loss": 0.6099, "step": 6917 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3490181813197023e-05, "loss": 0.5324, "step": 6918 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3488312632877514e-05, "loss": 0.656, "step": 6919 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3486443313786955e-05, "loss": 0.5794, "step": 6920 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3484573855999705e-05, "loss": 0.6157, "step": 6921 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.348270425959014e-05, "loss": 0.7269, "step": 6922 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3480834524632634e-05, "loss": 0.6914, "step": 6923 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3478964651201567e-05, "loss": 0.6534, "step": 6924 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3477094639371326e-05, "loss": 0.5906, "step": 6925 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3475224489216303e-05, "loss": 0.7352, "step": 6926 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3473354200810898e-05, "loss": 0.7371, "step": 6927 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3471483774229512e-05, "loss": 0.6515, "step": 6928 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3469613209546555e-05, "loss": 0.5839, "step": 6929 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3467742506836438e-05, "loss": 0.6165, "step": 6930 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3465871666173586e-05, "loss": 0.6215, "step": 6931 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3464000687632421e-05, "loss": 0.7034, "step": 6932 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3462129571287377e-05, "loss": 0.5766, "step": 6933 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3460258317212884e-05, "loss": 0.6064, "step": 6934 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3458386925483389e-05, "loss": 0.6369, "step": 6935 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 1.3456515396173337e-05, "loss": 0.6271, "step": 6936 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3454643729357185e-05, "loss": 0.7494, "step": 6937 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.345277192510938e-05, "loss": 0.6088, "step": 6938 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3450899983504398e-05, "loss": 0.7156, "step": 6939 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3449027904616703e-05, "loss": 0.7663, "step": 6940 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3447155688520768e-05, "loss": 0.5986, "step": 6941 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3445283335291075e-05, "loss": 0.6349, "step": 6942 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.344341084500211e-05, "loss": 0.6284, "step": 6943 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3441538217728361e-05, "loss": 0.6465, "step": 6944 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3439665453544324e-05, "loss": 0.5627, "step": 6945 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3437792552524504e-05, "loss": 0.724, "step": 6946 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3435919514743411e-05, "loss": 0.5475, "step": 6947 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3434046340275549e-05, "loss": 0.618, "step": 6948 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3432173029195443e-05, "loss": 0.5843, "step": 6949 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3430299581577612e-05, "loss": 0.5896, "step": 6950 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.342842599749659e-05, "loss": 0.6567, "step": 6951 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3426552277026908e-05, "loss": 0.6192, "step": 6952 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3424678420243105e-05, "loss": 0.6756, "step": 6953 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3422804427219726e-05, "loss": 0.596, "step": 6954 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.342093029803133e-05, "loss": 0.7196, "step": 6955 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3419056032752461e-05, "loss": 0.6076, "step": 6956 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3417181631457686e-05, "loss": 0.683, "step": 6957 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3415307094221573e-05, "loss": 0.7583, "step": 6958 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3413432421118693e-05, "loss": 0.7036, "step": 6959 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3411557612223625e-05, "loss": 0.6212, "step": 6960 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3409682667610948e-05, "loss": 0.6483, "step": 6961 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3407807587355254e-05, "loss": 0.744, "step": 6962 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3405932371531138e-05, "loss": 0.5874, "step": 6963 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.34040570202132e-05, "loss": 0.5901, "step": 6964 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3402181533476036e-05, "loss": 0.6325, "step": 6965 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3400305911394266e-05, "loss": 0.702, "step": 6966 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3398430154042502e-05, "loss": 0.7004, "step": 6967 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3396554261495363e-05, "loss": 0.6246, "step": 6968 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3394678233827476e-05, "loss": 0.6614, "step": 6969 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3392802071113475e-05, "loss": 0.7446, "step": 6970 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3390925773427996e-05, "loss": 0.5318, "step": 6971 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3389049340845681e-05, "loss": 0.7039, "step": 6972 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3387172773441174e-05, "loss": 0.5476, "step": 6973 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3385296071289138e-05, "loss": 0.6976, "step": 6974 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.338341923446422e-05, "loss": 0.6602, "step": 6975 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3381542263041088e-05, "loss": 0.6538, "step": 6976 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3379665157094414e-05, "loss": 0.5926, "step": 6977 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.337778791669887e-05, "loss": 0.5963, "step": 6978 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3375910541929136e-05, "loss": 0.6609, "step": 6979 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3374033032859899e-05, "loss": 0.6503, "step": 6980 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3372155389565845e-05, "loss": 0.5723, "step": 6981 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3370277612121677e-05, "loss": 0.6856, "step": 6982 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3368399700602089e-05, "loss": 0.674, "step": 6983 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3366521655081787e-05, "loss": 0.6784, "step": 6984 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3364643475635495e-05, "loss": 0.745, "step": 6985 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3362765162337914e-05, "loss": 0.7474, "step": 6986 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3360886715263778e-05, "loss": 0.678, "step": 6987 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.335900813448781e-05, "loss": 0.5804, "step": 6988 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3357129420084744e-05, "loss": 0.6162, "step": 6989 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.335525057212932e-05, "loss": 0.634, "step": 6990 }, { "epoch": 1.26, "grad_norm": 0.0, "learning_rate": 1.3353371590696278e-05, "loss": 0.5828, "step": 6991 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3351492475860368e-05, "loss": 0.6554, "step": 6992 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3349613227696352e-05, "loss": 0.694, "step": 6993 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3347733846278979e-05, "loss": 0.6603, "step": 6994 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.334585433168302e-05, "loss": 0.7062, "step": 6995 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3343974683983244e-05, "loss": 0.7172, "step": 6996 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3342094903254423e-05, "loss": 0.6993, "step": 6997 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3340214989571345e-05, "loss": 0.5995, "step": 6998 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3338334943008792e-05, "loss": 0.6504, "step": 6999 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3336454763641557e-05, "loss": 0.7019, "step": 7000 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3334574451544435e-05, "loss": 0.6746, "step": 7001 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3332694006792229e-05, "loss": 0.7201, "step": 7002 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3330813429459743e-05, "loss": 0.6993, "step": 7003 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3328932719621799e-05, "loss": 0.6808, "step": 7004 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3327051877353206e-05, "loss": 0.6169, "step": 7005 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3325170902728788e-05, "loss": 0.626, "step": 7006 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3323289795823375e-05, "loss": 0.792, "step": 7007 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3321408556711803e-05, "loss": 0.7149, "step": 7008 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3319527185468908e-05, "loss": 0.6283, "step": 7009 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3317645682169535e-05, "loss": 0.66, "step": 7010 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3315764046888532e-05, "loss": 0.5395, "step": 7011 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3313882279700759e-05, "loss": 0.7142, "step": 7012 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3312000380681068e-05, "loss": 0.6065, "step": 7013 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3310118349904329e-05, "loss": 0.6959, "step": 7014 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3308236187445414e-05, "loss": 0.5567, "step": 7015 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3306353893379192e-05, "loss": 0.591, "step": 7016 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3304471467780549e-05, "loss": 0.7171, "step": 7017 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3302588910724369e-05, "loss": 0.5952, "step": 7018 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.330070622228555e-05, "loss": 0.5509, "step": 7019 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3298823402538978e-05, "loss": 0.6561, "step": 7020 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3296940451559562e-05, "loss": 0.682, "step": 7021 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3295057369422203e-05, "loss": 0.6365, "step": 7022 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3293174156201824e-05, "loss": 0.7656, "step": 7023 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3291290811973329e-05, "loss": 0.6065, "step": 7024 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3289407336811651e-05, "loss": 0.6267, "step": 7025 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3287523730791713e-05, "loss": 0.7503, "step": 7026 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3285639993988448e-05, "loss": 0.6747, "step": 7027 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3283756126476797e-05, "loss": 0.5493, "step": 7028 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3281872128331703e-05, "loss": 0.7072, "step": 7029 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.327998799962811e-05, "loss": 0.5392, "step": 7030 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.327810374044098e-05, "loss": 0.7835, "step": 7031 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3276219350845264e-05, "loss": 0.6443, "step": 7032 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3274334830915932e-05, "loss": 0.6278, "step": 7033 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3272450180727949e-05, "loss": 0.8636, "step": 7034 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3270565400356293e-05, "loss": 0.6267, "step": 7035 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3268680489875943e-05, "loss": 0.5915, "step": 7036 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3266795449361883e-05, "loss": 0.6461, "step": 7037 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3264910278889103e-05, "loss": 0.6623, "step": 7038 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3263024978532599e-05, "loss": 0.6446, "step": 7039 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3261139548367372e-05, "loss": 0.6456, "step": 7040 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3259253988468425e-05, "loss": 0.6098, "step": 7041 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3257368298910778e-05, "loss": 0.6152, "step": 7042 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3255482479769432e-05, "loss": 0.6442, "step": 7043 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.325359653111942e-05, "loss": 0.6431, "step": 7044 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3251710453035763e-05, "loss": 0.647, "step": 7045 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.3249824245593495e-05, "loss": 0.6842, "step": 7046 }, { "epoch": 1.27, "grad_norm": 0.0, "learning_rate": 1.324793790886765e-05, "loss": 0.6786, "step": 7047 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3246051442933272e-05, "loss": 0.646, "step": 7048 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3244164847865405e-05, "loss": 0.6062, "step": 7049 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3242278123739105e-05, "loss": 0.6571, "step": 7050 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3240391270629425e-05, "loss": 0.5868, "step": 7051 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.323850428861143e-05, "loss": 0.7247, "step": 7052 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3236617177760187e-05, "loss": 0.6494, "step": 7053 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3234729938150765e-05, "loss": 0.642, "step": 7054 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3232842569858249e-05, "loss": 0.6063, "step": 7055 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3230955072957714e-05, "loss": 0.679, "step": 7056 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3229067447524254e-05, "loss": 0.6596, "step": 7057 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3227179693632958e-05, "loss": 0.6486, "step": 7058 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3225291811358925e-05, "loss": 0.6734, "step": 7059 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3223403800777257e-05, "loss": 0.8083, "step": 7060 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3221515661963068e-05, "loss": 0.7003, "step": 7061 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3219627394991464e-05, "loss": 0.5799, "step": 7062 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3217738999937567e-05, "loss": 0.8397, "step": 7063 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3215850476876502e-05, "loss": 0.7382, "step": 7064 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3213961825883394e-05, "loss": 0.6067, "step": 7065 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3212073047033381e-05, "loss": 0.5886, "step": 7066 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3210184140401597e-05, "loss": 0.6671, "step": 7067 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3208295106063189e-05, "loss": 0.5787, "step": 7068 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3206405944093307e-05, "loss": 0.6902, "step": 7069 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3204516654567101e-05, "loss": 0.6591, "step": 7070 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3202627237559735e-05, "loss": 0.6069, "step": 7071 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3200737693146375e-05, "loss": 0.6952, "step": 7072 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3198848021402179e-05, "loss": 0.6429, "step": 7073 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3196958222402332e-05, "loss": 0.601, "step": 7074 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3195068296222012e-05, "loss": 0.6654, "step": 7075 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3193178242936399e-05, "loss": 0.5862, "step": 7076 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3191288062620689e-05, "loss": 0.6035, "step": 7077 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3189397755350067e-05, "loss": 0.6486, "step": 7078 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3187507321199742e-05, "loss": 0.6437, "step": 7079 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3185616760244916e-05, "loss": 0.7365, "step": 7080 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3183726072560796e-05, "loss": 0.6473, "step": 7081 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3181835258222599e-05, "loss": 0.6732, "step": 7082 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3179944317305546e-05, "loss": 0.6885, "step": 7083 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3178053249884862e-05, "loss": 0.6858, "step": 7084 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3176162056035773e-05, "loss": 0.6352, "step": 7085 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3174270735833515e-05, "loss": 0.6433, "step": 7086 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3172379289353331e-05, "loss": 0.6201, "step": 7087 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3170487716670467e-05, "loss": 0.6136, "step": 7088 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3168596017860164e-05, "loss": 0.6575, "step": 7089 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3166704192997688e-05, "loss": 0.6656, "step": 7090 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3164812242158294e-05, "loss": 0.6409, "step": 7091 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3162920165417246e-05, "loss": 0.5842, "step": 7092 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3161027962849816e-05, "loss": 0.67, "step": 7093 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.315913563453128e-05, "loss": 0.6484, "step": 7094 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3157243180536913e-05, "loss": 0.6438, "step": 7095 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3155350600942008e-05, "loss": 0.6211, "step": 7096 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3153457895821846e-05, "loss": 0.7922, "step": 7097 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.315156506525173e-05, "loss": 0.628, "step": 7098 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3149672109306956e-05, "loss": 0.6265, "step": 7099 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.314777902806283e-05, "loss": 0.6591, "step": 7100 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3145885821594662e-05, "loss": 0.6509, "step": 7101 }, { "epoch": 1.28, "grad_norm": 0.0, "learning_rate": 1.3143992489977767e-05, "loss": 0.69, "step": 7102 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3142099033287463e-05, "loss": 0.6918, "step": 7103 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3140205451599077e-05, "loss": 0.624, "step": 7104 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3138311744987936e-05, "loss": 0.7001, "step": 7105 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3136417913529384e-05, "loss": 0.6657, "step": 7106 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3134523957298755e-05, "loss": 0.6135, "step": 7107 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3132629876371388e-05, "loss": 0.5625, "step": 7108 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3130735670822639e-05, "loss": 0.6972, "step": 7109 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3128841340727862e-05, "loss": 0.619, "step": 7110 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3126946886162417e-05, "loss": 0.7855, "step": 7111 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3125052307201668e-05, "loss": 0.6302, "step": 7112 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3123157603920987e-05, "loss": 0.6682, "step": 7113 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3121262776395743e-05, "loss": 0.6301, "step": 7114 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3119367824701322e-05, "loss": 0.6566, "step": 7115 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3117472748913099e-05, "loss": 0.6573, "step": 7116 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3115577549106475e-05, "loss": 0.5937, "step": 7117 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3113682225356838e-05, "loss": 0.661, "step": 7118 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3111786777739586e-05, "loss": 0.5826, "step": 7119 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3109891206330128e-05, "loss": 0.6564, "step": 7120 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3107995511203867e-05, "loss": 0.6785, "step": 7121 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3106099692436223e-05, "loss": 0.627, "step": 7122 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3104203750102614e-05, "loss": 0.6716, "step": 7123 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3102307684278454e-05, "loss": 0.6534, "step": 7124 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.310041149503919e-05, "loss": 0.6531, "step": 7125 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3098515182460239e-05, "loss": 0.7654, "step": 7126 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3096618746617048e-05, "loss": 0.5995, "step": 7127 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3094722187585055e-05, "loss": 0.6574, "step": 7128 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3092825505439718e-05, "loss": 0.7326, "step": 7129 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.309092870025648e-05, "loss": 0.6036, "step": 7130 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3089031772110802e-05, "loss": 0.834, "step": 7131 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.308713472107815e-05, "loss": 0.6202, "step": 7132 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3085237547233993e-05, "loss": 0.5911, "step": 7133 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.30833402506538e-05, "loss": 0.6523, "step": 7134 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3081442831413044e-05, "loss": 0.547, "step": 7135 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.307954528958722e-05, "loss": 0.6278, "step": 7136 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3077647625251809e-05, "loss": 0.6325, "step": 7137 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3075749838482299e-05, "loss": 0.616, "step": 7138 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3073851929354193e-05, "loss": 0.6784, "step": 7139 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3071953897942993e-05, "loss": 0.7899, "step": 7140 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3070055744324201e-05, "loss": 0.6966, "step": 7141 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3068157468573336e-05, "loss": 0.6735, "step": 7142 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3066259070765908e-05, "loss": 0.6428, "step": 7143 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3064360550977445e-05, "loss": 0.6783, "step": 7144 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3062461909283466e-05, "loss": 0.6115, "step": 7145 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3060563145759507e-05, "loss": 0.7449, "step": 7146 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3058664260481102e-05, "loss": 0.6218, "step": 7147 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3056765253523796e-05, "loss": 0.6607, "step": 7148 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3054866124963128e-05, "loss": 0.63, "step": 7149 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3052966874874655e-05, "loss": 0.5994, "step": 7150 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3051067503333927e-05, "loss": 0.6241, "step": 7151 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.304916801041651e-05, "loss": 0.5894, "step": 7152 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3047268396197966e-05, "loss": 0.6806, "step": 7153 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3045368660753861e-05, "loss": 0.6705, "step": 7154 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3043468804159777e-05, "loss": 0.6853, "step": 7155 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3041568826491292e-05, "loss": 0.5832, "step": 7156 }, { "epoch": 1.29, "grad_norm": 0.0, "learning_rate": 1.3039668727823984e-05, "loss": 0.5675, "step": 7157 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3037768508233451e-05, "loss": 0.6914, "step": 7158 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.303586816779528e-05, "loss": 0.5694, "step": 7159 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3033967706585073e-05, "loss": 0.6239, "step": 7160 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3032067124678433e-05, "loss": 0.6614, "step": 7161 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3030166422150966e-05, "loss": 0.624, "step": 7162 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3028265599078295e-05, "loss": 0.5982, "step": 7163 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3026364655536026e-05, "loss": 0.6495, "step": 7164 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3024463591599785e-05, "loss": 0.6029, "step": 7165 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3022562407345204e-05, "loss": 0.6434, "step": 7166 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3020661102847912e-05, "loss": 0.6057, "step": 7167 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3018759678183547e-05, "loss": 0.6816, "step": 7168 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.301685813342775e-05, "loss": 0.6924, "step": 7169 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3014956468656165e-05, "loss": 0.6408, "step": 7170 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3013054683944453e-05, "loss": 0.592, "step": 7171 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3011152779368261e-05, "loss": 0.7254, "step": 7172 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.300925075500325e-05, "loss": 0.606, "step": 7173 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3007348610925096e-05, "loss": 0.5663, "step": 7174 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3005446347209456e-05, "loss": 0.6771, "step": 7175 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3003543963932015e-05, "loss": 0.6195, "step": 7176 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.3001641461168448e-05, "loss": 0.6655, "step": 7177 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2999738838994445e-05, "loss": 0.5967, "step": 7178 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2997836097485687e-05, "loss": 0.5839, "step": 7179 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2995933236717878e-05, "loss": 0.6364, "step": 7180 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2994030256766708e-05, "loss": 0.6713, "step": 7181 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.299212715770789e-05, "loss": 0.595, "step": 7182 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2990223939617126e-05, "loss": 0.7006, "step": 7183 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2988320602570128e-05, "loss": 0.6519, "step": 7184 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2986417146642621e-05, "loss": 0.677, "step": 7185 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2984513571910322e-05, "loss": 0.6366, "step": 7186 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2982609878448962e-05, "loss": 0.6368, "step": 7187 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2980706066334273e-05, "loss": 0.6018, "step": 7188 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2978802135641987e-05, "loss": 0.6892, "step": 7189 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2976898086447851e-05, "loss": 0.6562, "step": 7190 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2974993918827607e-05, "loss": 0.6013, "step": 7191 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.297308963285701e-05, "loss": 0.6424, "step": 7192 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2971185228611815e-05, "loss": 0.5468, "step": 7193 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.296928070616778e-05, "loss": 0.6297, "step": 7194 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2967376065600674e-05, "loss": 0.5332, "step": 7195 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2965471306986264e-05, "loss": 0.698, "step": 7196 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2963566430400323e-05, "loss": 0.5637, "step": 7197 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2961661435918635e-05, "loss": 0.6806, "step": 7198 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2959756323616981e-05, "loss": 0.5952, "step": 7199 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.295785109357115e-05, "loss": 0.6662, "step": 7200 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2955945745856937e-05, "loss": 0.6599, "step": 7201 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2954040280550136e-05, "loss": 0.6338, "step": 7202 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.295213469772655e-05, "loss": 0.6445, "step": 7203 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2950228997461994e-05, "loss": 0.6528, "step": 7204 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2948323179832271e-05, "loss": 0.7517, "step": 7205 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2946417244913204e-05, "loss": 0.6793, "step": 7206 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2944511192780609e-05, "loss": 0.6799, "step": 7207 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.294260502351031e-05, "loss": 0.6925, "step": 7208 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2940698737178152e-05, "loss": 0.7195, "step": 7209 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2938792333859956e-05, "loss": 0.6209, "step": 7210 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2936885813631564e-05, "loss": 0.572, "step": 7211 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2934979176568827e-05, "loss": 0.5431, "step": 7212 }, { "epoch": 1.3, "grad_norm": 0.0, "learning_rate": 1.2933072422747588e-05, "loss": 0.6589, "step": 7213 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2931165552243704e-05, "loss": 0.5801, "step": 7214 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2929258565133034e-05, "loss": 0.6945, "step": 7215 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.292735146149144e-05, "loss": 0.5543, "step": 7216 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2925444241394788e-05, "loss": 0.6746, "step": 7217 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2923536904918955e-05, "loss": 0.674, "step": 7218 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2921629452139812e-05, "loss": 0.6286, "step": 7219 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.291972188313325e-05, "loss": 0.6313, "step": 7220 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2917814197975144e-05, "loss": 0.6267, "step": 7221 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2915906396741394e-05, "loss": 0.6406, "step": 7222 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2913998479507892e-05, "loss": 0.6146, "step": 7223 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2912090446350538e-05, "loss": 0.5786, "step": 7224 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2910182297345237e-05, "loss": 0.6921, "step": 7225 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.29082740325679e-05, "loss": 0.591, "step": 7226 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2906365652094438e-05, "loss": 0.6974, "step": 7227 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2904457156000776e-05, "loss": 0.7323, "step": 7228 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2902548544362827e-05, "loss": 0.6668, "step": 7229 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2900639817256525e-05, "loss": 0.6645, "step": 7230 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2898730974757809e-05, "loss": 0.6497, "step": 7231 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.28968220169426e-05, "loss": 0.6917, "step": 7232 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2894912943886855e-05, "loss": 0.5831, "step": 7233 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2893003755666512e-05, "loss": 0.6334, "step": 7234 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2891094452357521e-05, "loss": 0.644, "step": 7235 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2889185034035843e-05, "loss": 0.5397, "step": 7236 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2887275500777435e-05, "loss": 0.6688, "step": 7237 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2885365852658259e-05, "loss": 0.6705, "step": 7238 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2883456089754289e-05, "loss": 0.726, "step": 7239 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2881546212141493e-05, "loss": 0.5914, "step": 7240 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2879636219895855e-05, "loss": 0.6355, "step": 7241 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2877726113093354e-05, "loss": 0.6816, "step": 7242 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2875815891809978e-05, "loss": 0.6572, "step": 7243 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2873905556121722e-05, "loss": 0.6279, "step": 7244 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2871995106104578e-05, "loss": 0.5736, "step": 7245 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2870084541834547e-05, "loss": 0.7232, "step": 7246 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2868173863387638e-05, "loss": 0.5868, "step": 7247 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2866263070839857e-05, "loss": 0.7353, "step": 7248 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2864352164267224e-05, "loss": 0.6974, "step": 7249 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2862441143745756e-05, "loss": 0.6461, "step": 7250 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2860530009351471e-05, "loss": 0.7149, "step": 7251 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2858618761160403e-05, "loss": 0.6817, "step": 7252 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2856707399248588e-05, "loss": 0.6341, "step": 7253 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2854795923692054e-05, "loss": 0.5869, "step": 7254 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2852884334566848e-05, "loss": 0.6195, "step": 7255 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.285097263194902e-05, "loss": 0.6259, "step": 7256 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2849060815914611e-05, "loss": 0.7051, "step": 7257 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2847148886539686e-05, "loss": 0.691, "step": 7258 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.28452368439003e-05, "loss": 0.6712, "step": 7259 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2843324688072519e-05, "loss": 0.6449, "step": 7260 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2841412419132409e-05, "loss": 0.7585, "step": 7261 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2839500037156047e-05, "loss": 0.7567, "step": 7262 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2837587542219507e-05, "loss": 0.5594, "step": 7263 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2835674934398877e-05, "loss": 0.5421, "step": 7264 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2833762213770238e-05, "loss": 0.6366, "step": 7265 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2831849380409685e-05, "loss": 0.7886, "step": 7266 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.282993643439331e-05, "loss": 0.6819, "step": 7267 }, { "epoch": 1.31, "grad_norm": 0.0, "learning_rate": 1.2828023375797217e-05, "loss": 0.6029, "step": 7268 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.282611020469751e-05, "loss": 0.6454, "step": 7269 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2824196921170296e-05, "loss": 0.64, "step": 7270 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2822283525291691e-05, "loss": 0.6571, "step": 7271 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2820370017137816e-05, "loss": 0.7022, "step": 7272 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2818456396784787e-05, "loss": 0.5871, "step": 7273 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2816542664308733e-05, "loss": 0.6905, "step": 7274 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.281462881978579e-05, "loss": 0.5269, "step": 7275 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2812714863292085e-05, "loss": 0.5342, "step": 7276 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2810800794903773e-05, "loss": 0.6519, "step": 7277 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2808886614696982e-05, "loss": 0.5892, "step": 7278 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2806972322747876e-05, "loss": 0.5964, "step": 7279 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2805057919132601e-05, "loss": 0.6735, "step": 7280 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2803143403927314e-05, "loss": 0.6807, "step": 7281 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2801228777208181e-05, "loss": 0.6679, "step": 7282 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.279931403905137e-05, "loss": 0.6037, "step": 7283 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2797399189533049e-05, "loss": 0.7643, "step": 7284 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2795484228729398e-05, "loss": 0.6307, "step": 7285 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2793569156716592e-05, "loss": 0.7346, "step": 7286 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2791653973570823e-05, "loss": 0.7153, "step": 7287 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2789738679368277e-05, "loss": 0.6039, "step": 7288 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2787823274185145e-05, "loss": 0.7384, "step": 7289 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2785907758097627e-05, "loss": 0.686, "step": 7290 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2783992131181929e-05, "loss": 0.725, "step": 7291 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2782076393514252e-05, "loss": 0.6112, "step": 7292 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2780160545170812e-05, "loss": 0.6442, "step": 7293 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.277824458622782e-05, "loss": 0.7134, "step": 7294 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2776328516761498e-05, "loss": 0.5957, "step": 7295 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2774412336848076e-05, "loss": 0.6656, "step": 7296 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2772496046563775e-05, "loss": 0.6186, "step": 7297 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2770579645984836e-05, "loss": 0.6632, "step": 7298 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2768663135187489e-05, "loss": 0.6566, "step": 7299 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.276674651424798e-05, "loss": 0.6646, "step": 7300 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2764829783242557e-05, "loss": 0.6248, "step": 7301 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2762912942247466e-05, "loss": 0.6329, "step": 7302 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2760995991338967e-05, "loss": 0.6494, "step": 7303 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2759078930593316e-05, "loss": 0.687, "step": 7304 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2757161760086776e-05, "loss": 0.7348, "step": 7305 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2755244479895623e-05, "loss": 0.6748, "step": 7306 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2753327090096124e-05, "loss": 0.7371, "step": 7307 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2751409590764555e-05, "loss": 0.6996, "step": 7308 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2749491981977197e-05, "loss": 0.6402, "step": 7309 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2747574263810341e-05, "loss": 0.5973, "step": 7310 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2745656436340275e-05, "loss": 0.6554, "step": 7311 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.274373849964329e-05, "loss": 0.6214, "step": 7312 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2741820453795686e-05, "loss": 0.7297, "step": 7313 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2739902298873768e-05, "loss": 0.7092, "step": 7314 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2737984034953845e-05, "loss": 0.6878, "step": 7315 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2736065662112225e-05, "loss": 0.572, "step": 7316 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2734147180425227e-05, "loss": 0.6974, "step": 7317 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.273222858996917e-05, "loss": 0.6547, "step": 7318 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2730309890820379e-05, "loss": 0.6534, "step": 7319 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2728391083055182e-05, "loss": 0.6252, "step": 7320 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2726472166749914e-05, "loss": 0.5778, "step": 7321 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2724553141980916e-05, "loss": 0.6347, "step": 7322 }, { "epoch": 1.32, "grad_norm": 0.0, "learning_rate": 1.2722634008824524e-05, "loss": 0.5772, "step": 7323 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2720714767357083e-05, "loss": 0.6079, "step": 7324 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2718795417654952e-05, "loss": 0.6065, "step": 7325 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2716875959794486e-05, "loss": 0.6174, "step": 7326 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2714956393852032e-05, "loss": 0.6515, "step": 7327 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2713036719903965e-05, "loss": 0.6697, "step": 7328 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.271111693802665e-05, "loss": 0.5583, "step": 7329 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.270919704829646e-05, "loss": 0.7314, "step": 7330 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2707277050789767e-05, "loss": 0.6747, "step": 7331 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2705356945582956e-05, "loss": 0.6032, "step": 7332 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2703436732752415e-05, "loss": 0.7309, "step": 7333 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2701516412374527e-05, "loss": 0.6133, "step": 7334 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2699595984525686e-05, "loss": 0.6522, "step": 7335 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2697675449282291e-05, "loss": 0.6466, "step": 7336 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.269575480672075e-05, "loss": 0.5883, "step": 7337 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2693834056917463e-05, "loss": 0.6412, "step": 7338 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2691913199948843e-05, "loss": 0.6889, "step": 7339 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.26899922358913e-05, "loss": 0.6254, "step": 7340 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2688071164821265e-05, "loss": 0.5762, "step": 7341 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2686149986815148e-05, "loss": 0.6656, "step": 7342 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2684228701949386e-05, "loss": 0.5891, "step": 7343 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2682307310300406e-05, "loss": 0.6601, "step": 7344 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2680385811944648e-05, "loss": 0.7046, "step": 7345 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2678464206958549e-05, "loss": 0.6875, "step": 7346 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2676542495418557e-05, "loss": 0.5799, "step": 7347 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.267462067740112e-05, "loss": 0.7049, "step": 7348 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2672698752982689e-05, "loss": 0.6112, "step": 7349 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2670776722239724e-05, "loss": 0.5886, "step": 7350 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2668854585248683e-05, "loss": 0.6946, "step": 7351 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2666932342086038e-05, "loss": 0.5946, "step": 7352 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2665009992828254e-05, "loss": 0.6427, "step": 7353 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2663087537551806e-05, "loss": 0.7074, "step": 7354 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2661164976333175e-05, "loss": 0.6166, "step": 7355 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2659242309248843e-05, "loss": 0.6445, "step": 7356 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2657319536375296e-05, "loss": 0.5471, "step": 7357 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2655396657789026e-05, "loss": 0.6546, "step": 7358 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2653473673566525e-05, "loss": 0.5862, "step": 7359 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.26515505837843e-05, "loss": 0.5577, "step": 7360 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2649627388518848e-05, "loss": 0.6752, "step": 7361 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.264770408784668e-05, "loss": 0.6696, "step": 7362 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2645780681844306e-05, "loss": 0.5777, "step": 7363 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2643857170588245e-05, "loss": 0.6391, "step": 7364 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2641933554155018e-05, "loss": 0.7759, "step": 7365 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2640009832621148e-05, "loss": 0.6459, "step": 7366 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2638086006063164e-05, "loss": 0.5173, "step": 7367 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2636162074557599e-05, "loss": 0.6769, "step": 7368 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.263423803818099e-05, "loss": 0.6378, "step": 7369 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2632313897009878e-05, "loss": 0.6904, "step": 7370 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2630389651120813e-05, "loss": 0.6821, "step": 7371 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2628465300590343e-05, "loss": 0.6541, "step": 7372 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2626540845495015e-05, "loss": 0.6378, "step": 7373 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2624616285911395e-05, "loss": 0.5621, "step": 7374 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2622691621916043e-05, "loss": 0.6596, "step": 7375 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2620766853585526e-05, "loss": 0.7545, "step": 7376 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2618841980996414e-05, "loss": 0.6649, "step": 7377 }, { "epoch": 1.33, "grad_norm": 0.0, "learning_rate": 1.2616917004225279e-05, "loss": 0.5765, "step": 7378 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2614991923348709e-05, "loss": 0.6382, "step": 7379 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2613066738443276e-05, "loss": 0.6474, "step": 7380 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2611141449585568e-05, "loss": 0.5548, "step": 7381 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2609216056852187e-05, "loss": 0.6849, "step": 7382 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2607290560319718e-05, "loss": 0.5634, "step": 7383 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2605364960064765e-05, "loss": 0.6811, "step": 7384 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.260343925616393e-05, "loss": 0.7494, "step": 7385 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.260151344869382e-05, "loss": 0.6184, "step": 7386 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2599587537731053e-05, "loss": 0.6805, "step": 7387 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2597661523352235e-05, "loss": 0.6068, "step": 7388 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2595735405633993e-05, "loss": 0.5498, "step": 7389 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2593809184652955e-05, "loss": 0.6884, "step": 7390 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2591882860485738e-05, "loss": 0.5252, "step": 7391 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2589956433208981e-05, "loss": 0.6928, "step": 7392 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2588029902899324e-05, "loss": 0.6051, "step": 7393 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.25861032696334e-05, "loss": 0.6169, "step": 7394 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2584176533487859e-05, "loss": 0.6902, "step": 7395 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.258224969453935e-05, "loss": 0.5813, "step": 7396 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2580322752864522e-05, "loss": 0.5845, "step": 7397 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.257839570854004e-05, "loss": 0.6182, "step": 7398 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2576468561642555e-05, "loss": 0.5908, "step": 7399 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2574541312248735e-05, "loss": 0.6068, "step": 7400 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2572613960435254e-05, "loss": 0.655, "step": 7401 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2570686506278782e-05, "loss": 0.5774, "step": 7402 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2568758949855999e-05, "loss": 0.5759, "step": 7403 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2566831291243583e-05, "loss": 0.6458, "step": 7404 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.256490353051822e-05, "loss": 0.5812, "step": 7405 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2562975667756603e-05, "loss": 0.6824, "step": 7406 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.256104770303542e-05, "loss": 0.6322, "step": 7407 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2559119636431375e-05, "loss": 0.6705, "step": 7408 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2557191468021166e-05, "loss": 0.7315, "step": 7409 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2555263197881498e-05, "loss": 0.648, "step": 7410 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2553334826089084e-05, "loss": 0.7071, "step": 7411 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2551406352720636e-05, "loss": 0.6223, "step": 7412 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2549477777852874e-05, "loss": 0.7534, "step": 7413 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2547549101562517e-05, "loss": 0.6772, "step": 7414 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2545620323926292e-05, "loss": 0.5702, "step": 7415 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.254369144502093e-05, "loss": 0.5342, "step": 7416 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2541762464923166e-05, "loss": 0.6394, "step": 7417 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2539833383709739e-05, "loss": 0.5963, "step": 7418 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2537904201457383e-05, "loss": 0.7397, "step": 7419 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2535974918242855e-05, "loss": 0.6714, "step": 7420 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2534045534142899e-05, "loss": 0.7266, "step": 7421 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2532116049234268e-05, "loss": 0.6455, "step": 7422 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2530186463593728e-05, "loss": 0.6883, "step": 7423 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.252825677729803e-05, "loss": 0.5999, "step": 7424 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2526326990423952e-05, "loss": 0.6822, "step": 7425 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2524397103048256e-05, "loss": 0.5766, "step": 7426 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2522467115247717e-05, "loss": 0.6838, "step": 7427 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2520537027099118e-05, "loss": 0.6845, "step": 7428 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2518606838679234e-05, "loss": 0.743, "step": 7429 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2516676550064859e-05, "loss": 0.6628, "step": 7430 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2514746161332778e-05, "loss": 0.6539, "step": 7431 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2512815672559789e-05, "loss": 0.5888, "step": 7432 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2510885083822685e-05, "loss": 0.5772, "step": 7433 }, { "epoch": 1.34, "grad_norm": 0.0, "learning_rate": 1.2508954395198272e-05, "loss": 0.5811, "step": 7434 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2507023606763355e-05, "loss": 0.6645, "step": 7435 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2505092718594748e-05, "loss": 0.5675, "step": 7436 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2503161730769255e-05, "loss": 0.6404, "step": 7437 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2501230643363703e-05, "loss": 0.7394, "step": 7438 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2499299456454911e-05, "loss": 0.6856, "step": 7439 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2497368170119704e-05, "loss": 0.5856, "step": 7440 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2495436784434913e-05, "loss": 0.6676, "step": 7441 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2493505299477374e-05, "loss": 0.6344, "step": 7442 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2491573715323922e-05, "loss": 0.5612, "step": 7443 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.24896420320514e-05, "loss": 0.6821, "step": 7444 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2487710249736651e-05, "loss": 0.7539, "step": 7445 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2485778368456525e-05, "loss": 0.7083, "step": 7446 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2483846388287882e-05, "loss": 0.7165, "step": 7447 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2481914309307569e-05, "loss": 0.7211, "step": 7448 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2479982131592457e-05, "loss": 0.5772, "step": 7449 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2478049855219404e-05, "loss": 0.6734, "step": 7450 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2476117480265286e-05, "loss": 0.4681, "step": 7451 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2474185006806974e-05, "loss": 0.6486, "step": 7452 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2472252434921341e-05, "loss": 0.6539, "step": 7453 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2470319764685268e-05, "loss": 0.6715, "step": 7454 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.246838699617565e-05, "loss": 0.7733, "step": 7455 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2466454129469363e-05, "loss": 0.7021, "step": 7456 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2464521164643308e-05, "loss": 0.7088, "step": 7457 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2462588101774382e-05, "loss": 0.6609, "step": 7458 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2460654940939477e-05, "loss": 0.6097, "step": 7459 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2458721682215505e-05, "loss": 0.7101, "step": 7460 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2456788325679374e-05, "loss": 0.5992, "step": 7461 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2454854871407993e-05, "loss": 0.6328, "step": 7462 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2452921319478281e-05, "loss": 0.6675, "step": 7463 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2450987669967157e-05, "loss": 0.6789, "step": 7464 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2449053922951542e-05, "loss": 0.7306, "step": 7465 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.244712007850837e-05, "loss": 0.727, "step": 7466 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2445186136714566e-05, "loss": 0.6302, "step": 7467 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.244325209764707e-05, "loss": 0.6292, "step": 7468 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2441317961382823e-05, "loss": 0.6186, "step": 7469 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2439383727998762e-05, "loss": 0.7318, "step": 7470 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2437449397571837e-05, "loss": 0.6252, "step": 7471 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2435514970179001e-05, "loss": 0.6543, "step": 7472 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2433580445897204e-05, "loss": 0.8121, "step": 7473 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2431645824803414e-05, "loss": 0.7327, "step": 7474 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2429711106974582e-05, "loss": 0.6897, "step": 7475 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.242777629248768e-05, "loss": 0.6607, "step": 7476 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2425841381419683e-05, "loss": 0.6174, "step": 7477 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2423906373847555e-05, "loss": 0.8055, "step": 7478 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2421971269848281e-05, "loss": 0.6191, "step": 7479 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2420036069498841e-05, "loss": 0.6139, "step": 7480 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2418100772876223e-05, "loss": 0.7044, "step": 7481 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.241616538005741e-05, "loss": 0.7936, "step": 7482 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2414229891119401e-05, "loss": 0.6243, "step": 7483 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2412294306139188e-05, "loss": 0.6183, "step": 7484 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2410358625193779e-05, "loss": 0.5707, "step": 7485 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2408422848360171e-05, "loss": 0.6101, "step": 7486 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2406486975715379e-05, "loss": 0.7253, "step": 7487 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2404551007336412e-05, "loss": 0.5696, "step": 7488 }, { "epoch": 1.35, "grad_norm": 0.0, "learning_rate": 1.2402614943300287e-05, "loss": 0.7032, "step": 7489 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2400678783684023e-05, "loss": 0.6518, "step": 7490 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2398742528564645e-05, "loss": 0.5861, "step": 7491 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2396806178019181e-05, "loss": 0.5968, "step": 7492 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.239486973212466e-05, "loss": 0.5716, "step": 7493 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2392933190958114e-05, "loss": 0.6761, "step": 7494 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.239099655459659e-05, "loss": 0.6201, "step": 7495 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.238905982311713e-05, "loss": 0.628, "step": 7496 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2387122996596774e-05, "loss": 0.5807, "step": 7497 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.238518607511258e-05, "loss": 0.6171, "step": 7498 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2383249058741594e-05, "loss": 0.6397, "step": 7499 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2381311947560879e-05, "loss": 0.5497, "step": 7500 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2379374741647494e-05, "loss": 0.6884, "step": 7501 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2377437441078503e-05, "loss": 0.7079, "step": 7502 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2375500045930984e-05, "loss": 0.5803, "step": 7503 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2373562556282002e-05, "loss": 0.6213, "step": 7504 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2371624972208634e-05, "loss": 0.6803, "step": 7505 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2369687293787962e-05, "loss": 0.6492, "step": 7506 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.236774952109707e-05, "loss": 0.6663, "step": 7507 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2365811654213047e-05, "loss": 0.6908, "step": 7508 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2363873693212982e-05, "loss": 0.5272, "step": 7509 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2361935638173974e-05, "loss": 0.7767, "step": 7510 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2359997489173118e-05, "loss": 0.8022, "step": 7511 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2358059246287519e-05, "loss": 0.642, "step": 7512 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2356120909594282e-05, "loss": 0.6762, "step": 7513 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2354182479170523e-05, "loss": 0.7128, "step": 7514 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2352243955093353e-05, "loss": 0.5166, "step": 7515 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2350305337439882e-05, "loss": 0.6595, "step": 7516 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2348366626287244e-05, "loss": 0.7359, "step": 7517 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2346427821712557e-05, "loss": 0.7061, "step": 7518 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2344488923792952e-05, "loss": 0.6826, "step": 7519 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.234254993260556e-05, "loss": 0.5782, "step": 7520 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2340610848227519e-05, "loss": 0.6637, "step": 7521 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2338671670735968e-05, "loss": 0.6003, "step": 7522 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2336732400208057e-05, "loss": 0.6834, "step": 7523 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2334793036720923e-05, "loss": 0.6214, "step": 7524 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2332853580351722e-05, "loss": 0.6018, "step": 7525 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2330914031177612e-05, "loss": 0.5958, "step": 7526 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2328974389275747e-05, "loss": 0.6796, "step": 7527 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.232703465472329e-05, "loss": 0.6416, "step": 7528 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.232509482759741e-05, "loss": 0.5778, "step": 7529 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2323154907975274e-05, "loss": 0.695, "step": 7530 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2321214895934056e-05, "loss": 0.6645, "step": 7531 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2319274791550932e-05, "loss": 0.5784, "step": 7532 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2317334594903085e-05, "loss": 0.5553, "step": 7533 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2315394306067701e-05, "loss": 0.6187, "step": 7534 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2313453925121958e-05, "loss": 0.7045, "step": 7535 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.231151345214306e-05, "loss": 0.5952, "step": 7536 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2309572887208194e-05, "loss": 0.6655, "step": 7537 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2307632230394564e-05, "loss": 0.6373, "step": 7538 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.230569148177937e-05, "loss": 0.626, "step": 7539 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2303750641439818e-05, "loss": 0.5803, "step": 7540 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.230180970945312e-05, "loss": 0.6928, "step": 7541 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.229986868589649e-05, "loss": 0.6855, "step": 7542 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2297927570847142e-05, "loss": 0.6284, "step": 7543 }, { "epoch": 1.36, "grad_norm": 0.0, "learning_rate": 1.2295986364382297e-05, "loss": 0.5753, "step": 7544 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2294045066579186e-05, "loss": 0.5708, "step": 7545 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2292103677515027e-05, "loss": 0.6218, "step": 7546 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.229016219726706e-05, "loss": 0.6491, "step": 7547 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2288220625912516e-05, "loss": 0.5723, "step": 7548 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2286278963528639e-05, "loss": 0.7166, "step": 7549 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2284337210192667e-05, "loss": 0.5484, "step": 7550 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2282395365981843e-05, "loss": 0.6799, "step": 7551 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2280453430973425e-05, "loss": 0.6226, "step": 7552 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2278511405244665e-05, "loss": 0.6352, "step": 7553 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2276569288872815e-05, "loss": 0.8184, "step": 7554 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.227462708193514e-05, "loss": 0.7145, "step": 7555 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2272684784508902e-05, "loss": 0.6425, "step": 7556 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.227074239667137e-05, "loss": 0.73, "step": 7557 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2268799918499816e-05, "loss": 0.6259, "step": 7558 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2266857350071512e-05, "loss": 0.668, "step": 7559 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2264914691463742e-05, "loss": 0.6266, "step": 7560 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2262971942753788e-05, "loss": 0.7761, "step": 7561 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2261029104018928e-05, "loss": 0.6672, "step": 7562 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2259086175336461e-05, "loss": 0.6157, "step": 7563 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2257143156783675e-05, "loss": 0.6522, "step": 7564 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2255200048437865e-05, "loss": 0.6984, "step": 7565 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2253256850376337e-05, "loss": 0.7231, "step": 7566 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2251313562676389e-05, "loss": 0.677, "step": 7567 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2249370185415334e-05, "loss": 0.6183, "step": 7568 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2247426718670476e-05, "loss": 0.6044, "step": 7569 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2245483162519134e-05, "loss": 0.6209, "step": 7570 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2243539517038625e-05, "loss": 0.7108, "step": 7571 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2241595782306272e-05, "loss": 0.6462, "step": 7572 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.22396519583994e-05, "loss": 0.5935, "step": 7573 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2237708045395333e-05, "loss": 0.533, "step": 7574 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2235764043371408e-05, "loss": 0.6595, "step": 7575 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2233819952404959e-05, "loss": 0.6201, "step": 7576 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2231875772573326e-05, "loss": 0.6955, "step": 7577 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2229931503953849e-05, "loss": 0.7025, "step": 7578 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2227987146623878e-05, "loss": 0.7102, "step": 7579 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2226042700660766e-05, "loss": 0.5786, "step": 7580 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2224098166141855e-05, "loss": 0.6239, "step": 7581 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2222153543144513e-05, "loss": 0.6181, "step": 7582 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2220208831746097e-05, "loss": 0.5736, "step": 7583 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2218264032023967e-05, "loss": 0.6643, "step": 7584 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2216319144055497e-05, "loss": 0.6887, "step": 7585 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2214374167918051e-05, "loss": 0.5723, "step": 7586 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2212429103689013e-05, "loss": 0.6346, "step": 7587 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2210483951445751e-05, "loss": 0.6781, "step": 7588 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2208538711265647e-05, "loss": 0.6881, "step": 7589 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2206593383226095e-05, "loss": 0.6613, "step": 7590 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2204647967404478e-05, "loss": 0.6734, "step": 7591 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2202702463878186e-05, "loss": 0.5844, "step": 7592 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2200756872724617e-05, "loss": 0.6432, "step": 7593 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.219881119402117e-05, "loss": 0.5957, "step": 7594 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2196865427845248e-05, "loss": 0.5793, "step": 7595 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2194919574274253e-05, "loss": 0.7071, "step": 7596 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.21929736333856e-05, "loss": 0.6246, "step": 7597 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2191027605256696e-05, "loss": 0.5257, "step": 7598 }, { "epoch": 1.37, "grad_norm": 0.0, "learning_rate": 1.2189081489964964e-05, "loss": 0.6169, "step": 7599 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2187135287587818e-05, "loss": 0.7644, "step": 7600 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2185188998202684e-05, "loss": 0.7542, "step": 7601 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2183242621886988e-05, "loss": 0.6901, "step": 7602 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.218129615871816e-05, "loss": 0.6732, "step": 7603 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2179349608773636e-05, "loss": 0.6413, "step": 7604 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2177402972130847e-05, "loss": 0.6659, "step": 7605 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2175456248867243e-05, "loss": 0.6371, "step": 7606 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2173509439060261e-05, "loss": 0.7239, "step": 7607 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2171562542787347e-05, "loss": 0.673, "step": 7608 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2169615560125958e-05, "loss": 0.6982, "step": 7609 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2167668491153546e-05, "loss": 0.5286, "step": 7610 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2165721335947567e-05, "loss": 0.6407, "step": 7611 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2163774094585482e-05, "loss": 0.6744, "step": 7612 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2161826767144756e-05, "loss": 0.7028, "step": 7613 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2159879353702862e-05, "loss": 0.6945, "step": 7614 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2157931854337264e-05, "loss": 0.644, "step": 7615 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2155984269125439e-05, "loss": 0.6453, "step": 7616 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.215403659814487e-05, "loss": 0.5978, "step": 7617 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.215208884147303e-05, "loss": 0.6941, "step": 7618 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2150140999187412e-05, "loss": 0.7329, "step": 7619 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2148193071365503e-05, "loss": 0.6594, "step": 7620 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2146245058084794e-05, "loss": 0.5954, "step": 7621 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2144296959422777e-05, "loss": 0.6239, "step": 7622 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2142348775456956e-05, "loss": 0.5806, "step": 7623 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2140400506264826e-05, "loss": 0.6689, "step": 7624 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2138452151923903e-05, "loss": 0.6195, "step": 7625 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2136503712511689e-05, "loss": 0.5319, "step": 7626 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2134555188105691e-05, "loss": 0.6343, "step": 7627 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.213260657878344e-05, "loss": 0.6349, "step": 7628 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2130657884622441e-05, "loss": 0.6809, "step": 7629 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2128709105700223e-05, "loss": 0.8107, "step": 7630 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2126760242094308e-05, "loss": 0.6279, "step": 7631 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2124811293882226e-05, "loss": 0.6879, "step": 7632 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2122862261141518e-05, "loss": 0.6048, "step": 7633 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2120913143949707e-05, "loss": 0.5872, "step": 7634 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2118963942384336e-05, "loss": 0.7883, "step": 7635 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2117014656522955e-05, "loss": 0.7295, "step": 7636 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2115065286443098e-05, "loss": 0.5631, "step": 7637 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2113115832222324e-05, "loss": 0.7097, "step": 7638 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2111166293938182e-05, "loss": 0.6278, "step": 7639 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.210921667166823e-05, "loss": 0.5918, "step": 7640 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2107266965490023e-05, "loss": 0.7086, "step": 7641 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2105317175481126e-05, "loss": 0.703, "step": 7642 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2103367301719104e-05, "loss": 0.5955, "step": 7643 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2101417344281534e-05, "loss": 0.6198, "step": 7644 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2099467303245977e-05, "loss": 0.7389, "step": 7645 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2097517178690012e-05, "loss": 0.7457, "step": 7646 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2095566970691223e-05, "loss": 0.567, "step": 7647 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.209361667932719e-05, "loss": 0.7068, "step": 7648 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.20916663046755e-05, "loss": 0.6581, "step": 7649 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2089715846813742e-05, "loss": 0.5366, "step": 7650 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2087765305819503e-05, "loss": 0.577, "step": 7651 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2085814681770389e-05, "loss": 0.6414, "step": 7652 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2083863974743993e-05, "loss": 0.6737, "step": 7653 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2081913184817913e-05, "loss": 0.5958, "step": 7654 }, { "epoch": 1.38, "grad_norm": 0.0, "learning_rate": 1.2079962312069766e-05, "loss": 0.6586, "step": 7655 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.207801135657715e-05, "loss": 0.7853, "step": 7656 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2076060318417686e-05, "loss": 0.7398, "step": 7657 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2074109197668985e-05, "loss": 0.7122, "step": 7658 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2072157994408669e-05, "loss": 0.6224, "step": 7659 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2070206708714356e-05, "loss": 0.688, "step": 7660 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2068255340663674e-05, "loss": 0.7413, "step": 7661 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.206630389033425e-05, "loss": 0.6712, "step": 7662 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2064352357803722e-05, "loss": 0.6565, "step": 7663 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2062400743149713e-05, "loss": 0.7173, "step": 7664 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2060449046449875e-05, "loss": 0.7137, "step": 7665 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2058497267781843e-05, "loss": 0.6724, "step": 7666 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2056545407223263e-05, "loss": 0.5907, "step": 7667 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2054593464851785e-05, "loss": 0.5823, "step": 7668 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2052641440745059e-05, "loss": 0.7152, "step": 7669 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2050689334980739e-05, "loss": 0.745, "step": 7670 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2048737147636484e-05, "loss": 0.661, "step": 7671 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2046784878789958e-05, "loss": 0.6518, "step": 7672 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.204483252851882e-05, "loss": 0.7113, "step": 7673 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2042880096900743e-05, "loss": 0.6684, "step": 7674 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2040927584013394e-05, "loss": 0.6763, "step": 7675 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2038974989934451e-05, "loss": 0.6215, "step": 7676 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.203702231474159e-05, "loss": 0.7062, "step": 7677 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2035069558512493e-05, "loss": 0.5689, "step": 7678 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.203311672132484e-05, "loss": 0.6207, "step": 7679 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2031163803256323e-05, "loss": 0.6752, "step": 7680 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2029210804384627e-05, "loss": 0.5691, "step": 7681 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2027257724787455e-05, "loss": 0.7343, "step": 7682 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2025304564542493e-05, "loss": 0.7266, "step": 7683 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2023351323727446e-05, "loss": 0.6454, "step": 7684 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.202139800242002e-05, "loss": 0.6203, "step": 7685 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2019444600697917e-05, "loss": 0.7079, "step": 7686 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2017491118638848e-05, "loss": 0.6218, "step": 7687 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2015537556320526e-05, "loss": 0.6566, "step": 7688 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2013583913820668e-05, "loss": 0.5831, "step": 7689 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2011630191216992e-05, "loss": 0.6626, "step": 7690 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.200967638858722e-05, "loss": 0.6584, "step": 7691 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2007722506009079e-05, "loss": 0.6265, "step": 7692 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2005768543560299e-05, "loss": 0.5268, "step": 7693 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.200381450131861e-05, "loss": 0.6252, "step": 7694 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.2001860379361746e-05, "loss": 0.7554, "step": 7695 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1999906177767447e-05, "loss": 0.6803, "step": 7696 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1997951896613454e-05, "loss": 0.5529, "step": 7697 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1995997535977513e-05, "loss": 0.5675, "step": 7698 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1994043095937369e-05, "loss": 0.6253, "step": 7699 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1992088576570772e-05, "loss": 0.6772, "step": 7700 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1990133977955487e-05, "loss": 0.5986, "step": 7701 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1988179300169256e-05, "loss": 0.5985, "step": 7702 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1986224543289849e-05, "loss": 0.5954, "step": 7703 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1984269707395026e-05, "loss": 0.685, "step": 7704 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1982314792562558e-05, "loss": 0.5845, "step": 7705 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.198035979887021e-05, "loss": 0.7033, "step": 7706 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1978404726395757e-05, "loss": 0.6884, "step": 7707 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1976449575216973e-05, "loss": 0.6762, "step": 7708 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1974494345411637e-05, "loss": 0.6775, "step": 7709 }, { "epoch": 1.39, "grad_norm": 0.0, "learning_rate": 1.1972539037057538e-05, "loss": 0.4707, "step": 7710 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1970583650232454e-05, "loss": 0.585, "step": 7711 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1968628185014182e-05, "loss": 0.6457, "step": 7712 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1966672641480501e-05, "loss": 0.591, "step": 7713 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1964717019709217e-05, "loss": 0.7809, "step": 7714 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1962761319778124e-05, "loss": 0.7327, "step": 7715 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1960805541765024e-05, "loss": 0.5608, "step": 7716 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1958849685747717e-05, "loss": 0.6152, "step": 7717 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1956893751804018e-05, "loss": 0.558, "step": 7718 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.195493774001173e-05, "loss": 0.7657, "step": 7719 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1952981650448674e-05, "loss": 0.625, "step": 7720 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1951025483192658e-05, "loss": 0.6903, "step": 7721 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1949069238321508e-05, "loss": 0.6668, "step": 7722 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1947112915913045e-05, "loss": 0.6693, "step": 7723 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.194515651604509e-05, "loss": 0.6872, "step": 7724 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1943200038795482e-05, "loss": 0.6291, "step": 7725 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1941243484242047e-05, "loss": 0.5928, "step": 7726 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.193928685246262e-05, "loss": 0.6805, "step": 7727 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.193733014353504e-05, "loss": 0.655, "step": 7728 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1935373357537146e-05, "loss": 0.606, "step": 7729 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1933416494546785e-05, "loss": 0.6672, "step": 7730 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1931459554641806e-05, "loss": 0.5482, "step": 7731 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1929502537900056e-05, "loss": 0.6551, "step": 7732 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1927545444399389e-05, "loss": 0.583, "step": 7733 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1925588274217664e-05, "loss": 0.4929, "step": 7734 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1923631027432736e-05, "loss": 0.6083, "step": 7735 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1921673704122473e-05, "loss": 0.7076, "step": 7736 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1919716304364736e-05, "loss": 0.6958, "step": 7737 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1917758828237394e-05, "loss": 0.5778, "step": 7738 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1915801275818325e-05, "loss": 0.5932, "step": 7739 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1913843647185397e-05, "loss": 0.6464, "step": 7740 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.191188594241649e-05, "loss": 0.5629, "step": 7741 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1909928161589488e-05, "loss": 0.6634, "step": 7742 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1907970304782266e-05, "loss": 0.653, "step": 7743 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1906012372072722e-05, "loss": 0.6474, "step": 7744 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1904054363538737e-05, "loss": 0.6675, "step": 7745 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1902096279258211e-05, "loss": 0.5994, "step": 7746 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1900138119309035e-05, "loss": 0.6888, "step": 7747 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1898179883769108e-05, "loss": 0.662, "step": 7748 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1896221572716334e-05, "loss": 0.514, "step": 7749 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.189426318622862e-05, "loss": 0.6639, "step": 7750 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1892304724383868e-05, "loss": 0.5711, "step": 7751 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1890346187259994e-05, "loss": 0.7646, "step": 7752 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.188838757493491e-05, "loss": 0.7207, "step": 7753 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1886428887486535e-05, "loss": 0.6763, "step": 7754 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1884470124992784e-05, "loss": 0.58, "step": 7755 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1882511287531584e-05, "loss": 0.6992, "step": 7756 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.188055237518086e-05, "loss": 0.7063, "step": 7757 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1878593388018545e-05, "loss": 0.6551, "step": 7758 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1876634326122562e-05, "loss": 0.6858, "step": 7759 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1874675189570852e-05, "loss": 0.6701, "step": 7760 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.187271597844135e-05, "loss": 0.6373, "step": 7761 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1870756692812003e-05, "loss": 0.6684, "step": 7762 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1868797332760746e-05, "loss": 0.6382, "step": 7763 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1866837898365533e-05, "loss": 0.6943, "step": 7764 }, { "epoch": 1.4, "grad_norm": 0.0, "learning_rate": 1.1864878389704306e-05, "loss": 0.6273, "step": 7765 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1862918806855026e-05, "loss": 0.7114, "step": 7766 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1860959149895641e-05, "loss": 0.5856, "step": 7767 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1858999418904113e-05, "loss": 0.6774, "step": 7768 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1857039613958408e-05, "loss": 0.6614, "step": 7769 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1855079735136482e-05, "loss": 0.6098, "step": 7770 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1853119782516306e-05, "loss": 0.5898, "step": 7771 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1851159756175852e-05, "loss": 0.6873, "step": 7772 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1849199656193091e-05, "loss": 0.7389, "step": 7773 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1847239482646e-05, "loss": 0.6566, "step": 7774 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1845279235612557e-05, "loss": 0.6167, "step": 7775 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1843318915170747e-05, "loss": 0.603, "step": 7776 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1841358521398554e-05, "loss": 0.5734, "step": 7777 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1839398054373959e-05, "loss": 0.7262, "step": 7778 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1837437514174961e-05, "loss": 0.6429, "step": 7779 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1835476900879551e-05, "loss": 0.5864, "step": 7780 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1833516214565727e-05, "loss": 0.6505, "step": 7781 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1831555455311487e-05, "loss": 0.6674, "step": 7782 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1829594623194833e-05, "loss": 0.5589, "step": 7783 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.182763371829377e-05, "loss": 0.4982, "step": 7784 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1825672740686307e-05, "loss": 0.6548, "step": 7785 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1823711690450452e-05, "loss": 0.5767, "step": 7786 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1821750567664225e-05, "loss": 0.7339, "step": 7787 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1819789372405641e-05, "loss": 0.5691, "step": 7788 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1817828104752716e-05, "loss": 0.6383, "step": 7789 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1815866764783475e-05, "loss": 0.6615, "step": 7790 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1813905352575942e-05, "loss": 0.5565, "step": 7791 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.181194386820815e-05, "loss": 0.6899, "step": 7792 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1809982311758123e-05, "loss": 0.6708, "step": 7793 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.18080206833039e-05, "loss": 0.7246, "step": 7794 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1806058982923521e-05, "loss": 0.8117, "step": 7795 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1804097210695019e-05, "loss": 0.5734, "step": 7796 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1802135366696437e-05, "loss": 0.7025, "step": 7797 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1800173451005826e-05, "loss": 0.5692, "step": 7798 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1798211463701228e-05, "loss": 0.6083, "step": 7799 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.17962494048607e-05, "loss": 0.7919, "step": 7800 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1794287274562295e-05, "loss": 0.6359, "step": 7801 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1792325072884064e-05, "loss": 0.7299, "step": 7802 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1790362799904076e-05, "loss": 0.6696, "step": 7803 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1788400455700385e-05, "loss": 0.5997, "step": 7804 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1786438040351062e-05, "loss": 0.6687, "step": 7805 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1784475553934174e-05, "loss": 0.7645, "step": 7806 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.178251299652779e-05, "loss": 0.7217, "step": 7807 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1780550368209986e-05, "loss": 0.6372, "step": 7808 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1778587669058839e-05, "loss": 0.5948, "step": 7809 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1776624899152427e-05, "loss": 0.6161, "step": 7810 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.177466205856883e-05, "loss": 0.67, "step": 7811 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1772699147386142e-05, "loss": 0.6391, "step": 7812 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1770736165682439e-05, "loss": 0.6829, "step": 7813 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1768773113535823e-05, "loss": 0.6907, "step": 7814 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1766809991024381e-05, "loss": 0.6487, "step": 7815 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1764846798226208e-05, "loss": 0.6874, "step": 7816 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1762883535219407e-05, "loss": 0.6357, "step": 7817 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1760920202082082e-05, "loss": 0.598, "step": 7818 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1758956798892331e-05, "loss": 0.6274, "step": 7819 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.175699332572827e-05, "loss": 0.6232, "step": 7820 }, { "epoch": 1.41, "grad_norm": 0.0, "learning_rate": 1.1755029782667999e-05, "loss": 0.6337, "step": 7821 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1753066169789643e-05, "loss": 0.6395, "step": 7822 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1751102487171307e-05, "loss": 0.737, "step": 7823 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1749138734891114e-05, "loss": 0.7552, "step": 7824 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1747174913027185e-05, "loss": 0.6437, "step": 7825 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1745211021657647e-05, "loss": 0.556, "step": 7826 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1743247060860627e-05, "loss": 0.6543, "step": 7827 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.174128303071425e-05, "loss": 0.6862, "step": 7828 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1739318931296653e-05, "loss": 0.6165, "step": 7829 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1737354762685967e-05, "loss": 0.5924, "step": 7830 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1735390524960333e-05, "loss": 0.6177, "step": 7831 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1733426218197889e-05, "loss": 0.653, "step": 7832 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1731461842476785e-05, "loss": 0.5594, "step": 7833 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.172949739787516e-05, "loss": 0.6849, "step": 7834 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1727532884471165e-05, "loss": 0.7548, "step": 7835 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1725568302342953e-05, "loss": 0.5771, "step": 7836 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1723603651568677e-05, "loss": 0.6299, "step": 7837 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1721638932226498e-05, "loss": 0.6339, "step": 7838 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.171967414439457e-05, "loss": 0.5792, "step": 7839 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.171770928815106e-05, "loss": 0.6803, "step": 7840 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1715744363574133e-05, "loss": 0.62, "step": 7841 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1713779370741956e-05, "loss": 0.4997, "step": 7842 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1711814309732697e-05, "loss": 0.6111, "step": 7843 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1709849180624533e-05, "loss": 0.6297, "step": 7844 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.170788398349564e-05, "loss": 0.6304, "step": 7845 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1705918718424196e-05, "loss": 0.634, "step": 7846 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1703953385488384e-05, "loss": 0.6339, "step": 7847 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1701987984766388e-05, "loss": 0.6695, "step": 7848 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1700022516336394e-05, "loss": 0.6684, "step": 7849 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.169805698027659e-05, "loss": 0.629, "step": 7850 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.169609137666517e-05, "loss": 0.5972, "step": 7851 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1694125705580333e-05, "loss": 0.6905, "step": 7852 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1692159967100272e-05, "loss": 0.6152, "step": 7853 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1690194161303185e-05, "loss": 0.6293, "step": 7854 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1688228288267283e-05, "loss": 0.6084, "step": 7855 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1686262348070768e-05, "loss": 0.6512, "step": 7856 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1684296340791844e-05, "loss": 0.6984, "step": 7857 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.168233026650873e-05, "loss": 0.6474, "step": 7858 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1680364125299633e-05, "loss": 0.6518, "step": 7859 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1678397917242776e-05, "loss": 0.7131, "step": 7860 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1676431642416373e-05, "loss": 0.5982, "step": 7861 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1674465300898645e-05, "loss": 0.5435, "step": 7862 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.167249889276782e-05, "loss": 0.641, "step": 7863 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1670532418102126e-05, "loss": 0.6598, "step": 7864 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.166856587697979e-05, "loss": 0.6824, "step": 7865 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1666599269479045e-05, "loss": 0.6247, "step": 7866 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1664632595678125e-05, "loss": 0.679, "step": 7867 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1662665855655269e-05, "loss": 0.6573, "step": 7868 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1660699049488717e-05, "loss": 0.6002, "step": 7869 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1658732177256711e-05, "loss": 0.595, "step": 7870 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.16567652390375e-05, "loss": 0.7079, "step": 7871 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1654798234909326e-05, "loss": 0.7305, "step": 7872 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1652831164950445e-05, "loss": 0.6381, "step": 7873 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.165086402923911e-05, "loss": 0.6121, "step": 7874 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.1648896827853575e-05, "loss": 0.707, "step": 7875 }, { "epoch": 1.42, "grad_norm": 0.0, "learning_rate": 1.16469295608721e-05, "loss": 0.6655, "step": 7876 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1644962228372944e-05, "loss": 0.6463, "step": 7877 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1642994830434373e-05, "loss": 0.641, "step": 7878 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1641027367134657e-05, "loss": 0.6928, "step": 7879 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1639059838552057e-05, "loss": 0.6622, "step": 7880 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1637092244764846e-05, "loss": 0.7009, "step": 7881 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1635124585851307e-05, "loss": 0.6197, "step": 7882 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1633156861889707e-05, "loss": 0.6275, "step": 7883 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.163118907295833e-05, "loss": 0.6331, "step": 7884 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.162922121913546e-05, "loss": 0.6157, "step": 7885 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1627253300499378e-05, "loss": 0.7088, "step": 7886 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1625285317128369e-05, "loss": 0.7295, "step": 7887 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1623317269100728e-05, "loss": 0.6539, "step": 7888 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1621349156494745e-05, "loss": 0.7087, "step": 7889 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1619380979388718e-05, "loss": 0.6086, "step": 7890 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1617412737860935e-05, "loss": 0.6655, "step": 7891 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1615444431989706e-05, "loss": 0.6159, "step": 7892 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.161347606185333e-05, "loss": 0.6215, "step": 7893 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1611507627530113e-05, "loss": 0.6315, "step": 7894 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1609539129098361e-05, "loss": 0.6637, "step": 7895 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1607570566636387e-05, "loss": 0.6279, "step": 7896 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1605601940222498e-05, "loss": 0.7221, "step": 7897 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1603633249935018e-05, "loss": 0.6031, "step": 7898 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1601664495852258e-05, "loss": 0.7498, "step": 7899 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1599695678052539e-05, "loss": 0.64, "step": 7900 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1597726796614189e-05, "loss": 0.7086, "step": 7901 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1595757851615525e-05, "loss": 0.6867, "step": 7902 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1593788843134883e-05, "loss": 0.6677, "step": 7903 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.159181977125059e-05, "loss": 0.7102, "step": 7904 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.158985063604098e-05, "loss": 0.589, "step": 7905 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1587881437584387e-05, "loss": 0.6005, "step": 7906 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1585912175959153e-05, "loss": 0.604, "step": 7907 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1583942851243613e-05, "loss": 0.695, "step": 7908 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1581973463516113e-05, "loss": 0.7122, "step": 7909 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1580004012854999e-05, "loss": 0.6874, "step": 7910 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1578034499338619e-05, "loss": 0.631, "step": 7911 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1576064923045326e-05, "loss": 0.6215, "step": 7912 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1574095284053466e-05, "loss": 0.6815, "step": 7913 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1572125582441402e-05, "loss": 0.6616, "step": 7914 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1570155818287487e-05, "loss": 0.5248, "step": 7915 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1568185991670082e-05, "loss": 0.6574, "step": 7916 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1566216102667557e-05, "loss": 0.7937, "step": 7917 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.156424615135827e-05, "loss": 0.5326, "step": 7918 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1562276137820589e-05, "loss": 0.7536, "step": 7919 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1560306062132894e-05, "loss": 0.5733, "step": 7920 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1558335924373543e-05, "loss": 0.6734, "step": 7921 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1556365724620924e-05, "loss": 0.5868, "step": 7922 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1554395462953406e-05, "loss": 0.6274, "step": 7923 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1552425139449377e-05, "loss": 0.7004, "step": 7924 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1550454754187218e-05, "loss": 0.6067, "step": 7925 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.154848430724531e-05, "loss": 0.5657, "step": 7926 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1546513798702041e-05, "loss": 0.6257, "step": 7927 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1544543228635812e-05, "loss": 0.6627, "step": 7928 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1542572597124999e-05, "loss": 0.6001, "step": 7929 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.154060190424801e-05, "loss": 0.6351, "step": 7930 }, { "epoch": 1.43, "grad_norm": 0.0, "learning_rate": 1.1538631150083237e-05, "loss": 0.6299, "step": 7931 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.153666033470908e-05, "loss": 0.6238, "step": 7932 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1534689458203945e-05, "loss": 0.6205, "step": 7933 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1532718520646231e-05, "loss": 0.6946, "step": 7934 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1530747522114351e-05, "loss": 0.6692, "step": 7935 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1528776462686712e-05, "loss": 0.626, "step": 7936 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1526805342441726e-05, "loss": 0.7302, "step": 7937 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1524834161457805e-05, "loss": 0.6289, "step": 7938 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1522862919813376e-05, "loss": 0.6338, "step": 7939 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1520891617586845e-05, "loss": 0.6947, "step": 7940 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.151892025485664e-05, "loss": 0.6379, "step": 7941 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.151694883170119e-05, "loss": 0.7628, "step": 7942 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1514977348198913e-05, "loss": 0.6129, "step": 7943 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1513005804428244e-05, "loss": 0.5899, "step": 7944 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.151103420046761e-05, "loss": 0.6573, "step": 7945 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1509062536395446e-05, "loss": 0.5951, "step": 7946 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1507090812290193e-05, "loss": 0.7258, "step": 7947 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1505119028230283e-05, "loss": 0.5903, "step": 7948 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.150314718429416e-05, "loss": 0.6192, "step": 7949 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1501175280560266e-05, "loss": 0.7373, "step": 7950 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.149920331710705e-05, "loss": 0.5536, "step": 7951 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1497231294012956e-05, "loss": 0.6445, "step": 7952 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1495259211356438e-05, "loss": 0.6507, "step": 7953 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1493287069215946e-05, "loss": 0.5768, "step": 7954 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1491314867669936e-05, "loss": 0.6553, "step": 7955 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1489342606796865e-05, "loss": 0.6181, "step": 7956 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1487370286675195e-05, "loss": 0.6072, "step": 7957 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.148539790738339e-05, "loss": 0.6015, "step": 7958 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1483425468999904e-05, "loss": 0.5924, "step": 7959 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1481452971603217e-05, "loss": 0.5896, "step": 7960 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1479480415271793e-05, "loss": 0.6193, "step": 7961 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1477507800084102e-05, "loss": 0.6649, "step": 7962 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.147553512611862e-05, "loss": 0.6633, "step": 7963 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1473562393453822e-05, "loss": 0.6467, "step": 7964 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.147158960216819e-05, "loss": 0.5625, "step": 7965 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1469616752340203e-05, "loss": 0.571, "step": 7966 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.146764384404834e-05, "loss": 0.6405, "step": 7967 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1465670877371093e-05, "loss": 0.7182, "step": 7968 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1463697852386948e-05, "loss": 0.7371, "step": 7969 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1461724769174393e-05, "loss": 0.6558, "step": 7970 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1459751627811926e-05, "loss": 0.6191, "step": 7971 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1457778428378037e-05, "loss": 0.6354, "step": 7972 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1455805170951223e-05, "loss": 0.5177, "step": 7973 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.145383185560999e-05, "loss": 0.7537, "step": 7974 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1451858482432829e-05, "loss": 0.6761, "step": 7975 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1449885051498256e-05, "loss": 0.7007, "step": 7976 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.144791156288477e-05, "loss": 0.6394, "step": 7977 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.144593801667088e-05, "loss": 0.669, "step": 7978 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.14439644129351e-05, "loss": 0.7411, "step": 7979 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.144199075175594e-05, "loss": 0.6696, "step": 7980 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1440017033211922e-05, "loss": 0.59, "step": 7981 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1438043257381556e-05, "loss": 0.5473, "step": 7982 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1436069424343365e-05, "loss": 0.7151, "step": 7983 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1434095534175872e-05, "loss": 0.6638, "step": 7984 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1432121586957608e-05, "loss": 0.6327, "step": 7985 }, { "epoch": 1.44, "grad_norm": 0.0, "learning_rate": 1.1430147582767088e-05, "loss": 0.6951, "step": 7986 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1428173521682848e-05, "loss": 0.5167, "step": 7987 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.142619940378342e-05, "loss": 0.6831, "step": 7988 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1424225229147337e-05, "loss": 0.6856, "step": 7989 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1422250997853132e-05, "loss": 0.7113, "step": 7990 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.142027670997935e-05, "loss": 0.6722, "step": 7991 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1418302365604524e-05, "loss": 0.645, "step": 7992 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1416327964807202e-05, "loss": 0.6811, "step": 7993 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1414353507665922e-05, "loss": 0.6615, "step": 7994 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1412378994259243e-05, "loss": 0.8157, "step": 7995 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1410404424665709e-05, "loss": 0.6111, "step": 7996 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1408429798963865e-05, "loss": 0.6353, "step": 7997 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1406455117232274e-05, "loss": 0.652, "step": 7998 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1404480379549488e-05, "loss": 0.6747, "step": 7999 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1402505585994068e-05, "loss": 0.6717, "step": 8000 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1400530736644573e-05, "loss": 0.7323, "step": 8001 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1398555831579563e-05, "loss": 0.7161, "step": 8002 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.139658087087761e-05, "loss": 0.6299, "step": 8003 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1394605854617278e-05, "loss": 0.588, "step": 8004 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.139263078287713e-05, "loss": 0.6706, "step": 8005 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.139065565573575e-05, "loss": 0.7286, "step": 8006 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1388680473271702e-05, "loss": 0.6542, "step": 8007 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1386705235563569e-05, "loss": 0.7013, "step": 8008 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1384729942689922e-05, "loss": 0.6165, "step": 8009 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1382754594729349e-05, "loss": 0.6437, "step": 8010 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.138077919176043e-05, "loss": 0.6821, "step": 8011 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1378803733861748e-05, "loss": 0.6398, "step": 8012 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1376828221111889e-05, "loss": 0.7093, "step": 8013 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1374852653589447e-05, "loss": 0.6547, "step": 8014 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.137287703137301e-05, "loss": 0.7057, "step": 8015 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1370901354541176e-05, "loss": 0.6023, "step": 8016 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1368925623172535e-05, "loss": 0.5353, "step": 8017 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.136694983734569e-05, "loss": 0.5979, "step": 8018 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1364973997139237e-05, "loss": 0.7899, "step": 8019 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1362998102631781e-05, "loss": 0.6476, "step": 8020 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1361022153901921e-05, "loss": 0.6296, "step": 8021 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1359046151028272e-05, "loss": 0.5566, "step": 8022 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1357070094089443e-05, "loss": 0.6292, "step": 8023 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1355093983164034e-05, "loss": 0.6834, "step": 8024 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1353117818330668e-05, "loss": 0.6985, "step": 8025 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1351141599667957e-05, "loss": 0.7639, "step": 8026 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1349165327254519e-05, "loss": 0.6727, "step": 8027 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1347189001168972e-05, "loss": 0.655, "step": 8028 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1345212621489936e-05, "loss": 0.598, "step": 8029 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1343236188296043e-05, "loss": 0.6346, "step": 8030 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1341259701665912e-05, "loss": 0.6593, "step": 8031 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.133928316167817e-05, "loss": 0.6285, "step": 8032 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.133730656841145e-05, "loss": 0.571, "step": 8033 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1335329921944384e-05, "loss": 0.7, "step": 8034 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1333353222355607e-05, "loss": 0.5944, "step": 8035 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1331376469723755e-05, "loss": 0.6943, "step": 8036 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1329399664127466e-05, "loss": 0.6125, "step": 8037 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.132742280564538e-05, "loss": 0.6643, "step": 8038 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1325445894356142e-05, "loss": 0.6016, "step": 8039 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1323468930338392e-05, "loss": 0.6662, "step": 8040 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1321491913670783e-05, "loss": 0.621, "step": 8041 }, { "epoch": 1.45, "grad_norm": 0.0, "learning_rate": 1.1319514844431967e-05, "loss": 0.5301, "step": 8042 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1317537722700582e-05, "loss": 0.5502, "step": 8043 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1315560548555294e-05, "loss": 0.6791, "step": 8044 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1313583322074752e-05, "loss": 0.6524, "step": 8045 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1311606043337617e-05, "loss": 0.5823, "step": 8046 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1309628712422545e-05, "loss": 0.5681, "step": 8047 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1307651329408197e-05, "loss": 0.6868, "step": 8048 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1305673894373246e-05, "loss": 0.7076, "step": 8049 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1303696407396345e-05, "loss": 0.8447, "step": 8050 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1301718868556168e-05, "loss": 0.6031, "step": 8051 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1299741277931387e-05, "loss": 0.6973, "step": 8052 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.129776363560067e-05, "loss": 0.6199, "step": 8053 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1295785941642692e-05, "loss": 0.7248, "step": 8054 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1293808196136129e-05, "loss": 0.571, "step": 8055 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.129183039915966e-05, "loss": 0.5773, "step": 8056 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1289852550791967e-05, "loss": 0.6092, "step": 8057 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1287874651111726e-05, "loss": 0.6864, "step": 8058 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1285896700197625e-05, "loss": 0.6564, "step": 8059 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1283918698128356e-05, "loss": 0.7011, "step": 8060 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1281940644982596e-05, "loss": 0.5873, "step": 8061 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1279962540839043e-05, "loss": 0.661, "step": 8062 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1277984385776386e-05, "loss": 0.6073, "step": 8063 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1276006179873323e-05, "loss": 0.708, "step": 8064 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1274027923208548e-05, "loss": 0.6714, "step": 8065 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.127204961586076e-05, "loss": 0.6649, "step": 8066 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1270071257908657e-05, "loss": 0.6602, "step": 8067 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1268092849430948e-05, "loss": 0.6194, "step": 8068 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1266114390506328e-05, "loss": 0.5764, "step": 8069 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1264135881213509e-05, "loss": 0.6696, "step": 8070 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1262157321631201e-05, "loss": 0.5782, "step": 8071 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1260178711838113e-05, "loss": 0.6812, "step": 8072 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1258200051912955e-05, "loss": 0.5621, "step": 8073 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1256221341934446e-05, "loss": 0.6649, "step": 8074 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1254242581981298e-05, "loss": 0.6036, "step": 8075 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1252263772132236e-05, "loss": 0.6386, "step": 8076 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1250284912465969e-05, "loss": 0.6232, "step": 8077 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.124830600306123e-05, "loss": 0.6625, "step": 8078 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1246327043996741e-05, "loss": 0.6601, "step": 8079 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1244348035351223e-05, "loss": 0.71, "step": 8080 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.124236897720341e-05, "loss": 0.6168, "step": 8081 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1240389869632034e-05, "loss": 0.7041, "step": 8082 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1238410712715822e-05, "loss": 0.589, "step": 8083 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.123643150653351e-05, "loss": 0.7522, "step": 8084 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1234452251163835e-05, "loss": 0.6435, "step": 8085 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1232472946685531e-05, "loss": 0.7069, "step": 8086 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1230493593177347e-05, "loss": 0.5711, "step": 8087 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1228514190718019e-05, "loss": 0.6321, "step": 8088 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1226534739386288e-05, "loss": 0.6174, "step": 8089 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1224555239260908e-05, "loss": 0.6655, "step": 8090 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1222575690420621e-05, "loss": 0.8044, "step": 8091 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.122059609294418e-05, "loss": 0.5794, "step": 8092 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1218616446910336e-05, "loss": 0.5785, "step": 8093 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1216636752397838e-05, "loss": 0.5377, "step": 8094 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.121465700948545e-05, "loss": 0.5337, "step": 8095 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1212677218251926e-05, "loss": 0.6205, "step": 8096 }, { "epoch": 1.46, "grad_norm": 0.0, "learning_rate": 1.1210697378776022e-05, "loss": 0.6362, "step": 8097 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1208717491136503e-05, "loss": 0.6537, "step": 8098 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1206737555412131e-05, "loss": 0.7057, "step": 8099 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1204757571681673e-05, "loss": 0.5249, "step": 8100 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1202777540023893e-05, "loss": 0.6373, "step": 8101 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1200797460517563e-05, "loss": 0.6254, "step": 8102 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1198817333241454e-05, "loss": 0.5897, "step": 8103 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1196837158274336e-05, "loss": 0.6318, "step": 8104 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1194856935694986e-05, "loss": 0.6485, "step": 8105 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1192876665582185e-05, "loss": 0.6411, "step": 8106 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1190896348014702e-05, "loss": 0.6155, "step": 8107 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.118891598307132e-05, "loss": 0.5938, "step": 8108 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1186935570830826e-05, "loss": 0.689, "step": 8109 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1184955111372002e-05, "loss": 0.6494, "step": 8110 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1182974604773633e-05, "loss": 0.7058, "step": 8111 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1180994051114509e-05, "loss": 0.6295, "step": 8112 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1179013450473417e-05, "loss": 0.6744, "step": 8113 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1177032802929154e-05, "loss": 0.6913, "step": 8114 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1175052108560504e-05, "loss": 0.7617, "step": 8115 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.117307136744627e-05, "loss": 0.5882, "step": 8116 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1171090579665249e-05, "loss": 0.5981, "step": 8117 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1169109745296236e-05, "loss": 0.654, "step": 8118 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1167128864418038e-05, "loss": 0.6465, "step": 8119 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1165147937109453e-05, "loss": 0.7148, "step": 8120 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1163166963449286e-05, "loss": 0.6565, "step": 8121 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1161185943516346e-05, "loss": 0.6701, "step": 8122 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1159204877389438e-05, "loss": 0.6987, "step": 8123 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1157223765147374e-05, "loss": 0.5772, "step": 8124 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.115524260686897e-05, "loss": 0.6305, "step": 8125 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1153261402633031e-05, "loss": 0.6277, "step": 8126 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1151280152518382e-05, "loss": 0.598, "step": 8127 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1149298856603835e-05, "loss": 0.6412, "step": 8128 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.114731751496821e-05, "loss": 0.6211, "step": 8129 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1145336127690329e-05, "loss": 0.6281, "step": 8130 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1143354694849016e-05, "loss": 0.6959, "step": 8131 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1141373216523093e-05, "loss": 0.7342, "step": 8132 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1139391692791387e-05, "loss": 0.6334, "step": 8133 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.113741012373273e-05, "loss": 0.5538, "step": 8134 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1135428509425947e-05, "loss": 0.6324, "step": 8135 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1133446849949878e-05, "loss": 0.5964, "step": 8136 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1131465145383347e-05, "loss": 0.609, "step": 8137 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1129483395805194e-05, "loss": 0.8418, "step": 8138 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.112750160129426e-05, "loss": 0.6378, "step": 8139 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1125519761929378e-05, "loss": 0.637, "step": 8140 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1123537877789392e-05, "loss": 0.7046, "step": 8141 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1121555948953146e-05, "loss": 0.6337, "step": 8142 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.111957397549948e-05, "loss": 0.5496, "step": 8143 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1117591957507247e-05, "loss": 0.6387, "step": 8144 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1115609895055291e-05, "loss": 0.6793, "step": 8145 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.111362778822246e-05, "loss": 0.6139, "step": 8146 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1111645637087609e-05, "loss": 0.6389, "step": 8147 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.110966344172959e-05, "loss": 0.7008, "step": 8148 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1107681202227258e-05, "loss": 0.6569, "step": 8149 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1105698918659472e-05, "loss": 0.6612, "step": 8150 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1103716591105087e-05, "loss": 0.6213, "step": 8151 }, { "epoch": 1.47, "grad_norm": 0.0, "learning_rate": 1.1101734219642965e-05, "loss": 0.7284, "step": 8152 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1099751804351971e-05, "loss": 0.6916, "step": 8153 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1097769345310963e-05, "loss": 0.5934, "step": 8154 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1095786842598813e-05, "loss": 0.6651, "step": 8155 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1093804296294381e-05, "loss": 0.6668, "step": 8156 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1091821706476544e-05, "loss": 0.5978, "step": 8157 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1089839073224167e-05, "loss": 0.6395, "step": 8158 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1087856396616126e-05, "loss": 0.732, "step": 8159 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1085873676731294e-05, "loss": 0.6498, "step": 8160 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1083890913648548e-05, "loss": 0.57, "step": 8161 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.108190810744676e-05, "loss": 0.5418, "step": 8162 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.107992525820482e-05, "loss": 0.5841, "step": 8163 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1077942366001598e-05, "loss": 0.6316, "step": 8164 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1075959430915984e-05, "loss": 0.6884, "step": 8165 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1073976453026865e-05, "loss": 0.8036, "step": 8166 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1071993432413116e-05, "loss": 0.5951, "step": 8167 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1070010369153635e-05, "loss": 0.6257, "step": 8168 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1068027263327307e-05, "loss": 0.5873, "step": 8169 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1066044115013028e-05, "loss": 0.7607, "step": 8170 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1064060924289686e-05, "loss": 0.5522, "step": 8171 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1062077691236176e-05, "loss": 0.5734, "step": 8172 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1060094415931397e-05, "loss": 0.567, "step": 8173 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.105811109845425e-05, "loss": 0.5855, "step": 8174 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1056127738883624e-05, "loss": 0.713, "step": 8175 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1054144337298432e-05, "loss": 0.5843, "step": 8176 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.105216089377757e-05, "loss": 0.5555, "step": 8177 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1050177408399948e-05, "loss": 0.6732, "step": 8178 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1048193881244467e-05, "loss": 0.6953, "step": 8179 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.104621031239004e-05, "loss": 0.7157, "step": 8180 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1044226701915571e-05, "loss": 0.4964, "step": 8181 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1042243049899978e-05, "loss": 0.598, "step": 8182 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.104025935642217e-05, "loss": 0.627, "step": 8183 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1038275621561063e-05, "loss": 0.6586, "step": 8184 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1036291845395575e-05, "loss": 0.5985, "step": 8185 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.103430802800462e-05, "loss": 0.62, "step": 8186 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1032324169467118e-05, "loss": 0.6839, "step": 8187 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1030340269861994e-05, "loss": 0.5972, "step": 8188 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.102835632926817e-05, "loss": 0.7208, "step": 8189 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1026372347764569e-05, "loss": 0.709, "step": 8190 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1024388325430115e-05, "loss": 0.5905, "step": 8191 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1022404262343741e-05, "loss": 0.6942, "step": 8192 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1020420158584377e-05, "loss": 0.5504, "step": 8193 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1018436014230946e-05, "loss": 0.6732, "step": 8194 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1016451829362386e-05, "loss": 0.6973, "step": 8195 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1014467604057635e-05, "loss": 0.7421, "step": 8196 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.101248333839562e-05, "loss": 0.6643, "step": 8197 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1010499032455288e-05, "loss": 0.5499, "step": 8198 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.100851468631557e-05, "loss": 0.6187, "step": 8199 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1006530300055412e-05, "loss": 0.6215, "step": 8200 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1004545873753754e-05, "loss": 0.5599, "step": 8201 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1002561407489538e-05, "loss": 0.7323, "step": 8202 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.1000576901341714e-05, "loss": 0.5648, "step": 8203 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.0998592355389229e-05, "loss": 0.6967, "step": 8204 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.0996607769711023e-05, "loss": 0.6121, "step": 8205 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.0994623144386059e-05, "loss": 0.667, "step": 8206 }, { "epoch": 1.48, "grad_norm": 0.0, "learning_rate": 1.0992638479493279e-05, "loss": 0.5886, "step": 8207 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0990653775111642e-05, "loss": 0.6508, "step": 8208 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0988669031320097e-05, "loss": 0.7427, "step": 8209 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0986684248197605e-05, "loss": 0.6272, "step": 8210 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0984699425823126e-05, "loss": 0.5723, "step": 8211 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0982714564275618e-05, "loss": 0.6547, "step": 8212 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0980729663634038e-05, "loss": 0.6384, "step": 8213 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0978744723977353e-05, "loss": 0.6512, "step": 8214 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0976759745384524e-05, "loss": 0.7396, "step": 8215 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0974774727934524e-05, "loss": 0.6477, "step": 8216 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0972789671706312e-05, "loss": 0.6802, "step": 8217 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.097080457677886e-05, "loss": 0.6152, "step": 8218 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.096881944323114e-05, "loss": 0.5805, "step": 8219 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0966834271142124e-05, "loss": 0.6219, "step": 8220 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.096484906059078e-05, "loss": 0.5613, "step": 8221 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0962863811656093e-05, "loss": 0.6041, "step": 8222 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.096087852441703e-05, "loss": 0.6573, "step": 8223 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0958893198952576e-05, "loss": 0.6922, "step": 8224 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0956907835341706e-05, "loss": 0.565, "step": 8225 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0954922433663404e-05, "loss": 0.5584, "step": 8226 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0952936993996652e-05, "loss": 0.5829, "step": 8227 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0950951516420435e-05, "loss": 0.6876, "step": 8228 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0948966001013732e-05, "loss": 0.6236, "step": 8229 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0946980447855543e-05, "loss": 0.5895, "step": 8230 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0944994857024846e-05, "loss": 0.6914, "step": 8231 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0943009228600635e-05, "loss": 0.6783, "step": 8232 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0941023562661902e-05, "loss": 0.695, "step": 8233 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0939037859287638e-05, "loss": 0.6943, "step": 8234 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0937052118556841e-05, "loss": 0.6079, "step": 8235 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0935066340548506e-05, "loss": 0.6064, "step": 8236 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0933080525341625e-05, "loss": 0.6914, "step": 8237 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.093109467301521e-05, "loss": 0.6348, "step": 8238 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.092910878364825e-05, "loss": 0.603, "step": 8239 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.092712285731975e-05, "loss": 0.6089, "step": 8240 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0925136894108714e-05, "loss": 0.6989, "step": 8241 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.092315089409415e-05, "loss": 0.7182, "step": 8242 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0921164857355061e-05, "loss": 0.7471, "step": 8243 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0919178783970457e-05, "loss": 0.7056, "step": 8244 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0917192674019345e-05, "loss": 0.6466, "step": 8245 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0915206527580737e-05, "loss": 0.6388, "step": 8246 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0913220344733647e-05, "loss": 0.7561, "step": 8247 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0911234125557084e-05, "loss": 0.6589, "step": 8248 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.090924787013007e-05, "loss": 0.5727, "step": 8249 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0907261578531619e-05, "loss": 0.629, "step": 8250 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0905275250840744e-05, "loss": 0.6353, "step": 8251 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.090328888713647e-05, "loss": 0.6476, "step": 8252 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0901302487497818e-05, "loss": 0.7137, "step": 8253 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0899316052003808e-05, "loss": 0.5861, "step": 8254 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0897329580733467e-05, "loss": 0.6042, "step": 8255 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0895343073765812e-05, "loss": 0.6263, "step": 8256 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0893356531179883e-05, "loss": 0.6472, "step": 8257 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0891369953054698e-05, "loss": 0.7034, "step": 8258 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0889383339469287e-05, "loss": 0.7357, "step": 8259 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0887396690502686e-05, "loss": 0.5549, "step": 8260 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0885410006233923e-05, "loss": 0.5952, "step": 8261 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0883423286742035e-05, "loss": 0.6774, "step": 8262 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 1.0881436532106054e-05, "loss": 0.6353, "step": 8263 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0879449742405015e-05, "loss": 0.6227, "step": 8264 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0877462917717965e-05, "loss": 0.7159, "step": 8265 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0875476058123933e-05, "loss": 0.7302, "step": 8266 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0873489163701963e-05, "loss": 0.6581, "step": 8267 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0871502234531097e-05, "loss": 0.7337, "step": 8268 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0869515270690386e-05, "loss": 0.744, "step": 8269 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.086752827225886e-05, "loss": 0.6655, "step": 8270 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0865541239315576e-05, "loss": 0.6022, "step": 8271 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0863554171939578e-05, "loss": 0.6627, "step": 8272 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0861567070209918e-05, "loss": 0.6156, "step": 8273 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0859579934205642e-05, "loss": 0.6556, "step": 8274 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0857592764005803e-05, "loss": 0.6914, "step": 8275 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0855605559689458e-05, "loss": 0.6513, "step": 8276 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0853618321335655e-05, "loss": 0.681, "step": 8277 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0851631049023453e-05, "loss": 0.7131, "step": 8278 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.084964374283191e-05, "loss": 0.6261, "step": 8279 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0847656402840083e-05, "loss": 0.7407, "step": 8280 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0845669029127032e-05, "loss": 0.5971, "step": 8281 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.084368162177182e-05, "loss": 0.6393, "step": 8282 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0841694180853503e-05, "loss": 0.5498, "step": 8283 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0839706706451157e-05, "loss": 0.653, "step": 8284 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0837719198643834e-05, "loss": 0.6594, "step": 8285 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0835731657510606e-05, "loss": 0.6255, "step": 8286 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0833744083130544e-05, "loss": 0.7152, "step": 8287 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0831756475582713e-05, "loss": 0.6198, "step": 8288 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0829768834946185e-05, "loss": 0.7009, "step": 8289 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.082778116130003e-05, "loss": 0.6018, "step": 8290 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0825793454723325e-05, "loss": 0.5593, "step": 8291 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.082380571529514e-05, "loss": 0.5791, "step": 8292 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0821817943094552e-05, "loss": 0.5999, "step": 8293 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0819830138200638e-05, "loss": 0.6145, "step": 8294 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0817842300692478e-05, "loss": 0.5549, "step": 8295 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.081585443064915e-05, "loss": 0.678, "step": 8296 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0813866528149732e-05, "loss": 0.5974, "step": 8297 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0811878593273313e-05, "loss": 0.5837, "step": 8298 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0809890626098971e-05, "loss": 0.7128, "step": 8299 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0807902626705796e-05, "loss": 0.5898, "step": 8300 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0805914595172867e-05, "loss": 0.5311, "step": 8301 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0803926531579271e-05, "loss": 0.6093, "step": 8302 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0801938436004109e-05, "loss": 0.5677, "step": 8303 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0799950308526457e-05, "loss": 0.7285, "step": 8304 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0797962149225411e-05, "loss": 0.6772, "step": 8305 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0795973958180066e-05, "loss": 0.6409, "step": 8306 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0793985735469512e-05, "loss": 0.6266, "step": 8307 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0791997481172847e-05, "loss": 0.5768, "step": 8308 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0790009195369162e-05, "loss": 0.5894, "step": 8309 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0788020878137561e-05, "loss": 0.639, "step": 8310 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.078603252955714e-05, "loss": 0.6172, "step": 8311 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0784044149706996e-05, "loss": 0.7389, "step": 8312 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.078205573866623e-05, "loss": 0.5967, "step": 8313 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0780067296513954e-05, "loss": 0.6297, "step": 8314 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0778078823329261e-05, "loss": 0.6284, "step": 8315 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0776090319191259e-05, "loss": 0.6267, "step": 8316 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0774101784179054e-05, "loss": 0.5372, "step": 8317 }, { "epoch": 1.5, "grad_norm": 0.0, "learning_rate": 1.0772113218371755e-05, "loss": 0.5961, "step": 8318 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.077012462184847e-05, "loss": 0.5565, "step": 8319 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0768135994688308e-05, "loss": 0.5897, "step": 8320 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0766147336970378e-05, "loss": 0.5893, "step": 8321 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.07641586487738e-05, "loss": 0.5661, "step": 8322 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0762169930177678e-05, "loss": 0.591, "step": 8323 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.076018118126113e-05, "loss": 0.7256, "step": 8324 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0758192402103274e-05, "loss": 0.6089, "step": 8325 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0756203592783223e-05, "loss": 0.5945, "step": 8326 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0754214753380098e-05, "loss": 0.6048, "step": 8327 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.075222588397302e-05, "loss": 0.6483, "step": 8328 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0750236984641108e-05, "loss": 0.7292, "step": 8329 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0748248055463483e-05, "loss": 0.5879, "step": 8330 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0746259096519267e-05, "loss": 0.5281, "step": 8331 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0744270107887587e-05, "loss": 0.6946, "step": 8332 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0742281089647568e-05, "loss": 0.5386, "step": 8333 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0740292041878333e-05, "loss": 0.695, "step": 8334 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0738302964659012e-05, "loss": 0.6423, "step": 8335 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.073631385806874e-05, "loss": 0.624, "step": 8336 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0734324722186636e-05, "loss": 0.735, "step": 8337 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0732335557091838e-05, "loss": 0.6289, "step": 8338 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0730346362863477e-05, "loss": 0.6283, "step": 8339 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0728357139580685e-05, "loss": 0.6631, "step": 8340 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0726367887322602e-05, "loss": 0.6202, "step": 8341 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.072437860616836e-05, "loss": 0.5396, "step": 8342 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.072238929619709e-05, "loss": 0.6987, "step": 8343 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0720399957487943e-05, "loss": 0.667, "step": 8344 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0718410590120048e-05, "loss": 0.6457, "step": 8345 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.071642119417255e-05, "loss": 0.695, "step": 8346 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0714431769724589e-05, "loss": 0.5787, "step": 8347 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.071244231685531e-05, "loss": 0.6217, "step": 8348 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0710452835643855e-05, "loss": 0.7024, "step": 8349 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0708463326169367e-05, "loss": 0.6646, "step": 8350 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0706473788510993e-05, "loss": 0.6629, "step": 8351 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0704484222747886e-05, "loss": 0.5755, "step": 8352 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0702494628959185e-05, "loss": 0.6556, "step": 8353 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.070050500722405e-05, "loss": 0.5694, "step": 8354 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0698515357621622e-05, "loss": 0.6444, "step": 8355 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0696525680231055e-05, "loss": 0.617, "step": 8356 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0694535975131507e-05, "loss": 0.6747, "step": 8357 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0692546242402126e-05, "loss": 0.5937, "step": 8358 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0690556482122067e-05, "loss": 0.6245, "step": 8359 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0688566694370495e-05, "loss": 0.7348, "step": 8360 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0686576879226555e-05, "loss": 0.762, "step": 8361 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.068458703676941e-05, "loss": 0.6932, "step": 8362 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0682597167078223e-05, "loss": 0.6445, "step": 8363 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0680607270232148e-05, "loss": 0.6615, "step": 8364 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0678617346310352e-05, "loss": 0.5723, "step": 8365 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0676627395391994e-05, "loss": 0.6832, "step": 8366 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0674637417556242e-05, "loss": 0.5954, "step": 8367 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0672647412882255e-05, "loss": 0.7177, "step": 8368 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0670657381449202e-05, "loss": 0.5461, "step": 8369 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.066866732333625e-05, "loss": 0.6816, "step": 8370 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0666677238622567e-05, "loss": 0.6246, "step": 8371 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.066468712738732e-05, "loss": 0.642, "step": 8372 }, { "epoch": 1.51, "grad_norm": 0.0, "learning_rate": 1.0662696989709682e-05, "loss": 0.7296, "step": 8373 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.066070682566882e-05, "loss": 0.6792, "step": 8374 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.065871663534391e-05, "loss": 0.5822, "step": 8375 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0656726418814125e-05, "loss": 0.4741, "step": 8376 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0654736176158638e-05, "loss": 0.5821, "step": 8377 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0652745907456624e-05, "loss": 0.7213, "step": 8378 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0650755612787258e-05, "loss": 0.6106, "step": 8379 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0648765292229719e-05, "loss": 0.7234, "step": 8380 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0646774945863185e-05, "loss": 0.769, "step": 8381 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.064478457376684e-05, "loss": 0.5299, "step": 8382 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0642794176019854e-05, "loss": 0.5706, "step": 8383 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0640803752701419e-05, "loss": 0.66, "step": 8384 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0638813303890711e-05, "loss": 0.582, "step": 8385 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0636822829666915e-05, "loss": 0.6884, "step": 8386 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0634832330109217e-05, "loss": 0.6464, "step": 8387 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0632841805296803e-05, "loss": 0.5718, "step": 8388 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0630851255308854e-05, "loss": 0.5915, "step": 8389 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0628860680224563e-05, "loss": 0.6667, "step": 8390 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0626870080123118e-05, "loss": 0.6211, "step": 8391 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0624879455083706e-05, "loss": 0.6656, "step": 8392 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0622888805185518e-05, "loss": 0.5971, "step": 8393 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0620898130507748e-05, "loss": 0.5597, "step": 8394 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0618907431129589e-05, "loss": 0.5724, "step": 8395 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0616916707130227e-05, "loss": 0.6779, "step": 8396 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0614925958588864e-05, "loss": 0.6901, "step": 8397 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0612935185584694e-05, "loss": 0.6361, "step": 8398 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0610944388196908e-05, "loss": 0.5836, "step": 8399 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0608953566504711e-05, "loss": 0.881, "step": 8400 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0606962720587301e-05, "loss": 0.6883, "step": 8401 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0604971850523868e-05, "loss": 0.6688, "step": 8402 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0602980956393622e-05, "loss": 0.6796, "step": 8403 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.060099003827576e-05, "loss": 0.5953, "step": 8404 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0598999096249486e-05, "loss": 0.6408, "step": 8405 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0597008130393999e-05, "loss": 0.6153, "step": 8406 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0595017140788508e-05, "loss": 0.5076, "step": 8407 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0593026127512214e-05, "loss": 0.6115, "step": 8408 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0591035090644328e-05, "loss": 0.651, "step": 8409 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.058904403026405e-05, "loss": 0.641, "step": 8410 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0587052946450593e-05, "loss": 0.6504, "step": 8411 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0585061839283165e-05, "loss": 0.6225, "step": 8412 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0583070708840972e-05, "loss": 0.6064, "step": 8413 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0581079555203231e-05, "loss": 0.5745, "step": 8414 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0579088378449148e-05, "loss": 0.7241, "step": 8415 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0577097178657935e-05, "loss": 0.7024, "step": 8416 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.057510595590881e-05, "loss": 0.5996, "step": 8417 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0573114710280984e-05, "loss": 0.6584, "step": 8418 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0571123441853674e-05, "loss": 0.6291, "step": 8419 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0569132150706099e-05, "loss": 0.5901, "step": 8420 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0567140836917465e-05, "loss": 0.67, "step": 8421 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0565149500567e-05, "loss": 0.5791, "step": 8422 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0563158141733917e-05, "loss": 0.66, "step": 8423 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0561166760497442e-05, "loss": 0.5295, "step": 8424 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0559175356936793e-05, "loss": 0.6482, "step": 8425 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0557183931131186e-05, "loss": 0.5913, "step": 8426 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0555192483159848e-05, "loss": 0.6757, "step": 8427 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0553201013102008e-05, "loss": 0.6087, "step": 8428 }, { "epoch": 1.52, "grad_norm": 0.0, "learning_rate": 1.0551209521036877e-05, "loss": 0.6341, "step": 8429 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.054921800704369e-05, "loss": 0.6973, "step": 8430 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.054722647120167e-05, "loss": 0.6802, "step": 8431 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0545234913590043e-05, "loss": 0.6545, "step": 8432 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0543243334288038e-05, "loss": 0.656, "step": 8433 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0541251733374883e-05, "loss": 0.6511, "step": 8434 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0539260110929805e-05, "loss": 0.8373, "step": 8435 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0537268467032036e-05, "loss": 0.5999, "step": 8436 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0535276801760805e-05, "loss": 0.6871, "step": 8437 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0533285115195351e-05, "loss": 0.6714, "step": 8438 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.05312934074149e-05, "loss": 0.6206, "step": 8439 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0529301678498685e-05, "loss": 0.6245, "step": 8440 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0527309928525944e-05, "loss": 0.65, "step": 8441 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.052531815757591e-05, "loss": 0.6031, "step": 8442 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.052332636572782e-05, "loss": 0.6713, "step": 8443 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0521334553060912e-05, "loss": 0.7096, "step": 8444 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0519342719654418e-05, "loss": 0.6366, "step": 8445 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0517350865587584e-05, "loss": 0.5952, "step": 8446 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0515358990939648e-05, "loss": 0.6558, "step": 8447 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0513367095789846e-05, "loss": 0.629, "step": 8448 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0511375180217422e-05, "loss": 0.6162, "step": 8449 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0509383244301619e-05, "loss": 0.7121, "step": 8450 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0507391288121675e-05, "loss": 0.5879, "step": 8451 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.050539931175684e-05, "loss": 0.6941, "step": 8452 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0503407315286352e-05, "loss": 0.6331, "step": 8453 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.050141529878946e-05, "loss": 0.6422, "step": 8454 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0499423262345407e-05, "loss": 0.748, "step": 8455 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.049743120603344e-05, "loss": 0.6072, "step": 8456 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0495439129932807e-05, "loss": 0.6684, "step": 8457 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0493447034122762e-05, "loss": 0.5758, "step": 8458 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0491454918682545e-05, "loss": 0.5582, "step": 8459 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0489462783691408e-05, "loss": 0.6432, "step": 8460 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0487470629228607e-05, "loss": 0.5745, "step": 8461 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0485478455373386e-05, "loss": 0.6344, "step": 8462 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0483486262205002e-05, "loss": 0.6351, "step": 8463 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0481494049802703e-05, "loss": 0.7478, "step": 8464 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.047950181824575e-05, "loss": 0.6657, "step": 8465 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0477509567613394e-05, "loss": 0.568, "step": 8466 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0475517297984884e-05, "loss": 0.5665, "step": 8467 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0473525009439485e-05, "loss": 0.7512, "step": 8468 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0471532702056449e-05, "loss": 0.5806, "step": 8469 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0469540375915034e-05, "loss": 0.5524, "step": 8470 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.04675480310945e-05, "loss": 0.6318, "step": 8471 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.04655556676741e-05, "loss": 0.5205, "step": 8472 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0463563285733107e-05, "loss": 0.6635, "step": 8473 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0461570885350767e-05, "loss": 0.6133, "step": 8474 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0459578466606345e-05, "loss": 0.6876, "step": 8475 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0457586029579105e-05, "loss": 0.5814, "step": 8476 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0455593574348311e-05, "loss": 0.6511, "step": 8477 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0453601100993221e-05, "loss": 0.6589, "step": 8478 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0451608609593105e-05, "loss": 0.595, "step": 8479 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0449616100227223e-05, "loss": 0.742, "step": 8480 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0447623572974844e-05, "loss": 0.5672, "step": 8481 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0445631027915231e-05, "loss": 0.5995, "step": 8482 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.044363846512765e-05, "loss": 0.6422, "step": 8483 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 1.0441645884691373e-05, "loss": 0.5912, "step": 8484 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.043965328668567e-05, "loss": 0.651, "step": 8485 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0437660671189799e-05, "loss": 0.6861, "step": 8486 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0435668038283043e-05, "loss": 0.652, "step": 8487 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0433675388044665e-05, "loss": 0.54, "step": 8488 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0431682720553935e-05, "loss": 0.6856, "step": 8489 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0429690035890128e-05, "loss": 0.5332, "step": 8490 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0427697334132512e-05, "loss": 0.6046, "step": 8491 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0425704615360368e-05, "loss": 0.685, "step": 8492 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0423711879652965e-05, "loss": 0.7208, "step": 8493 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0421719127089573e-05, "loss": 0.5919, "step": 8494 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0419726357749475e-05, "loss": 0.6615, "step": 8495 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0417733571711944e-05, "loss": 0.6264, "step": 8496 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0415740769056255e-05, "loss": 0.7351, "step": 8497 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0413747949861685e-05, "loss": 0.5906, "step": 8498 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0411755114207513e-05, "loss": 0.6088, "step": 8499 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.040976226217302e-05, "loss": 0.6532, "step": 8500 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0407769393837478e-05, "loss": 0.5324, "step": 8501 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0405776509280173e-05, "loss": 0.5519, "step": 8502 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0403783608580386e-05, "loss": 0.5503, "step": 8503 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0401790691817392e-05, "loss": 0.6222, "step": 8504 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0399797759070476e-05, "loss": 0.6554, "step": 8505 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0397804810418921e-05, "loss": 0.6906, "step": 8506 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.039581184594201e-05, "loss": 0.6609, "step": 8507 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0393818865719026e-05, "loss": 0.7834, "step": 8508 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0391825869829251e-05, "loss": 0.5464, "step": 8509 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.038983285835197e-05, "loss": 0.6271, "step": 8510 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0387839831366476e-05, "loss": 0.6848, "step": 8511 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0385846788952046e-05, "loss": 0.6885, "step": 8512 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0383853731187965e-05, "loss": 0.6276, "step": 8513 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0381860658153531e-05, "loss": 0.6557, "step": 8514 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0379867569928024e-05, "loss": 0.6764, "step": 8515 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0377874466590733e-05, "loss": 0.5971, "step": 8516 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0375881348220952e-05, "loss": 0.6436, "step": 8517 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0373888214897964e-05, "loss": 0.732, "step": 8518 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0371895066701066e-05, "loss": 0.6628, "step": 8519 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0369901903709541e-05, "loss": 0.5779, "step": 8520 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0367908726002684e-05, "loss": 0.6718, "step": 8521 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0365915533659791e-05, "loss": 0.7294, "step": 8522 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0363922326760152e-05, "loss": 0.6863, "step": 8523 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0361929105383054e-05, "loss": 0.7045, "step": 8524 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.03599358696078e-05, "loss": 0.7229, "step": 8525 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0357942619513678e-05, "loss": 0.7548, "step": 8526 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0355949355179988e-05, "loss": 0.5909, "step": 8527 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0353956076686025e-05, "loss": 0.6316, "step": 8528 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0351962784111076e-05, "loss": 0.659, "step": 8529 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0349969477534453e-05, "loss": 0.5993, "step": 8530 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.034797615703544e-05, "loss": 0.5724, "step": 8531 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0345982822693337e-05, "loss": 0.6995, "step": 8532 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0343989474587449e-05, "loss": 0.608, "step": 8533 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0341996112797068e-05, "loss": 0.6063, "step": 8534 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0340002737401496e-05, "loss": 0.5708, "step": 8535 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0338009348480033e-05, "loss": 0.5563, "step": 8536 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0336015946111977e-05, "loss": 0.6553, "step": 8537 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0334022530376636e-05, "loss": 0.6944, "step": 8538 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 1.0332029101353304e-05, "loss": 0.6491, "step": 8539 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0330035659121283e-05, "loss": 0.6995, "step": 8540 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0328042203759883e-05, "loss": 0.7229, "step": 8541 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0326048735348398e-05, "loss": 0.5487, "step": 8542 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0324055253966136e-05, "loss": 0.5708, "step": 8543 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0322061759692403e-05, "loss": 0.7648, "step": 8544 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0320068252606501e-05, "loss": 0.6001, "step": 8545 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0318074732787734e-05, "loss": 0.5365, "step": 8546 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0316081200315414e-05, "loss": 0.6477, "step": 8547 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0314087655268835e-05, "loss": 0.6361, "step": 8548 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0312094097727317e-05, "loss": 0.6215, "step": 8549 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.031010052777016e-05, "loss": 0.6131, "step": 8550 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0308106945476668e-05, "loss": 0.5409, "step": 8551 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0306113350926158e-05, "loss": 0.6242, "step": 8552 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0304119744197935e-05, "loss": 0.6338, "step": 8553 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0302126125371309e-05, "loss": 0.666, "step": 8554 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0300132494525586e-05, "loss": 0.5971, "step": 8555 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0298138851740077e-05, "loss": 0.5572, "step": 8556 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0296145197094093e-05, "loss": 0.5671, "step": 8557 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.029415153066695e-05, "loss": 0.5816, "step": 8558 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.029215785253795e-05, "loss": 0.6563, "step": 8559 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0290164162786415e-05, "loss": 0.6928, "step": 8560 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.028817046149165e-05, "loss": 0.6248, "step": 8561 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.028617674873297e-05, "loss": 0.6589, "step": 8562 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0284183024589689e-05, "loss": 0.6877, "step": 8563 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0282189289141122e-05, "loss": 0.6942, "step": 8564 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.028019554246658e-05, "loss": 0.5977, "step": 8565 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0278201784645381e-05, "loss": 0.6311, "step": 8566 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0276208015756834e-05, "loss": 0.6321, "step": 8567 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0274214235880267e-05, "loss": 0.6766, "step": 8568 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0272220445094983e-05, "loss": 0.6062, "step": 8569 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0270226643480301e-05, "loss": 0.5772, "step": 8570 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0268232831115543e-05, "loss": 0.554, "step": 8571 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0266239008080024e-05, "loss": 0.6177, "step": 8572 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.026424517445306e-05, "loss": 0.595, "step": 8573 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.026225133031397e-05, "loss": 0.7452, "step": 8574 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0260257475742075e-05, "loss": 0.5929, "step": 8575 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.025826361081669e-05, "loss": 0.591, "step": 8576 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0256269735617138e-05, "loss": 0.6432, "step": 8577 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0254275850222732e-05, "loss": 0.5683, "step": 8578 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0252281954712803e-05, "loss": 0.5789, "step": 8579 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.025028804916666e-05, "loss": 0.6103, "step": 8580 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0248294133663634e-05, "loss": 0.6589, "step": 8581 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0246300208283042e-05, "loss": 0.6808, "step": 8582 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0244306273104203e-05, "loss": 0.6845, "step": 8583 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0242312328206446e-05, "loss": 0.587, "step": 8584 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0240318373669085e-05, "loss": 0.5538, "step": 8585 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0238324409571447e-05, "loss": 0.6623, "step": 8586 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.023633043599286e-05, "loss": 0.7021, "step": 8587 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.023433645301264e-05, "loss": 0.7389, "step": 8588 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0232342460710116e-05, "loss": 0.5974, "step": 8589 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0230348459164611e-05, "loss": 0.739, "step": 8590 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.022835444845545e-05, "loss": 0.7051, "step": 8591 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.022636042866196e-05, "loss": 0.6442, "step": 8592 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0224366399863462e-05, "loss": 0.6203, "step": 8593 }, { "epoch": 1.55, "grad_norm": 0.0, "learning_rate": 1.0222372362139283e-05, "loss": 0.5387, "step": 8594 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0220378315568751e-05, "loss": 0.5884, "step": 8595 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0218384260231193e-05, "loss": 0.7086, "step": 8596 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0216390196205931e-05, "loss": 0.6863, "step": 8597 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0214396123572302e-05, "loss": 0.5273, "step": 8598 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0212402042409626e-05, "loss": 0.654, "step": 8599 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0210407952797232e-05, "loss": 0.4796, "step": 8600 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.020841385481445e-05, "loss": 0.6743, "step": 8601 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0206419748540608e-05, "loss": 0.6748, "step": 8602 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0204425634055036e-05, "loss": 0.5747, "step": 8603 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.020243151143706e-05, "loss": 0.588, "step": 8604 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.020043738076601e-05, "loss": 0.5421, "step": 8605 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0198443242121224e-05, "loss": 0.6088, "step": 8606 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0196449095582017e-05, "loss": 0.6151, "step": 8607 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0194454941227734e-05, "loss": 0.6061, "step": 8608 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0192460779137703e-05, "loss": 0.6953, "step": 8609 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0190466609391246e-05, "loss": 0.7276, "step": 8610 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0188472432067704e-05, "loss": 0.697, "step": 8611 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0186478247246405e-05, "loss": 0.7906, "step": 8612 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0184484055006686e-05, "loss": 0.5906, "step": 8613 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.018248985542787e-05, "loss": 0.6322, "step": 8614 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0180495648589297e-05, "loss": 0.6418, "step": 8615 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0178501434570296e-05, "loss": 0.6767, "step": 8616 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0176507213450206e-05, "loss": 0.6843, "step": 8617 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0174512985308354e-05, "loss": 0.7812, "step": 8618 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0172518750224077e-05, "loss": 0.6861, "step": 8619 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0170524508276707e-05, "loss": 0.6475, "step": 8620 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0168530259545582e-05, "loss": 0.7079, "step": 8621 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0166536004110034e-05, "loss": 0.7686, "step": 8622 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0164541742049398e-05, "loss": 0.5609, "step": 8623 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.016254747344301e-05, "loss": 0.7051, "step": 8624 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0160553198370207e-05, "loss": 0.8362, "step": 8625 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0158558916910319e-05, "loss": 0.684, "step": 8626 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0156564629142688e-05, "loss": 0.611, "step": 8627 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.015457033514665e-05, "loss": 0.5961, "step": 8628 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0152576035001534e-05, "loss": 0.5795, "step": 8629 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0150581728786686e-05, "loss": 0.633, "step": 8630 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0148587416581436e-05, "loss": 0.5759, "step": 8631 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0146593098465127e-05, "loss": 0.6555, "step": 8632 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0144598774517092e-05, "loss": 0.5979, "step": 8633 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0142604444816668e-05, "loss": 0.6559, "step": 8634 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0140610109443196e-05, "loss": 0.5676, "step": 8635 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0138615768476018e-05, "loss": 0.6375, "step": 8636 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0136621421994462e-05, "loss": 0.6305, "step": 8637 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0134627070077872e-05, "loss": 0.6495, "step": 8638 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0132632712805589e-05, "loss": 0.5985, "step": 8639 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0130638350256948e-05, "loss": 0.5851, "step": 8640 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0128643982511287e-05, "loss": 0.6532, "step": 8641 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0126649609647952e-05, "loss": 0.6039, "step": 8642 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0124655231746275e-05, "loss": 0.7029, "step": 8643 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0122660848885604e-05, "loss": 0.5009, "step": 8644 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.012066646114527e-05, "loss": 0.6327, "step": 8645 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.011867206860462e-05, "loss": 0.5785, "step": 8646 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0116677671342994e-05, "loss": 0.5955, "step": 8647 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0114683269439725e-05, "loss": 0.7119, "step": 8648 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0112688862974161e-05, "loss": 0.4938, "step": 8649 }, { "epoch": 1.56, "grad_norm": 0.0, "learning_rate": 1.0110694452025642e-05, "loss": 0.6564, "step": 8650 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0108700036673506e-05, "loss": 0.5539, "step": 8651 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.01067056169971e-05, "loss": 0.6485, "step": 8652 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0104711193075756e-05, "loss": 0.5991, "step": 8653 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0102716764988827e-05, "loss": 0.6707, "step": 8654 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0100722332815649e-05, "loss": 0.558, "step": 8655 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.009872789663556e-05, "loss": 0.666, "step": 8656 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0096733456527907e-05, "loss": 0.6085, "step": 8657 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0094739012572034e-05, "loss": 0.692, "step": 8658 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0092744564847278e-05, "loss": 0.6499, "step": 8659 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0090750113432987e-05, "loss": 0.6133, "step": 8660 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0088755658408501e-05, "loss": 0.6755, "step": 8661 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0086761199853161e-05, "loss": 0.6331, "step": 8662 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0084766737846312e-05, "loss": 0.6757, "step": 8663 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0082772272467297e-05, "loss": 0.6105, "step": 8664 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0080777803795458e-05, "loss": 0.6015, "step": 8665 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0078783331910144e-05, "loss": 0.5476, "step": 8666 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0076788856890692e-05, "loss": 0.6326, "step": 8667 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0074794378816445e-05, "loss": 0.6194, "step": 8668 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0072799897766751e-05, "loss": 0.5639, "step": 8669 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0070805413820954e-05, "loss": 0.7316, "step": 8670 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0068810927058397e-05, "loss": 0.5505, "step": 8671 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0066816437558418e-05, "loss": 0.6574, "step": 8672 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0064821945400372e-05, "loss": 0.6473, "step": 8673 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0062827450663597e-05, "loss": 0.5404, "step": 8674 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0060832953427437e-05, "loss": 0.7252, "step": 8675 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0058838453771239e-05, "loss": 0.6131, "step": 8676 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0056843951774347e-05, "loss": 0.5841, "step": 8677 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0054849447516103e-05, "loss": 0.6245, "step": 8678 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0052854941075855e-05, "loss": 0.7556, "step": 8679 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0050860432532946e-05, "loss": 0.601, "step": 8680 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0048865921966725e-05, "loss": 0.5542, "step": 8681 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0046871409456534e-05, "loss": 0.6167, "step": 8682 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0044876895081711e-05, "loss": 0.706, "step": 8683 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0042882378921614e-05, "loss": 0.6133, "step": 8684 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0040887861055585e-05, "loss": 0.6894, "step": 8685 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0038893341562964e-05, "loss": 0.6376, "step": 8686 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0036898820523098e-05, "loss": 0.6133, "step": 8687 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0034904298015333e-05, "loss": 0.6398, "step": 8688 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0032909774119018e-05, "loss": 0.6602, "step": 8689 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0030915248913495e-05, "loss": 0.6446, "step": 8690 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0028920722478109e-05, "loss": 0.659, "step": 8691 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0026926194892208e-05, "loss": 0.6286, "step": 8692 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0024931666235138e-05, "loss": 0.6803, "step": 8693 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0022937136586241e-05, "loss": 0.6517, "step": 8694 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0020942606024868e-05, "loss": 0.6291, "step": 8695 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0018948074630362e-05, "loss": 0.6103, "step": 8696 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0016953542482069e-05, "loss": 0.684, "step": 8697 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0014959009659333e-05, "loss": 0.6977, "step": 8698 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0012964476241502e-05, "loss": 0.6426, "step": 8699 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0010969942307926e-05, "loss": 0.6222, "step": 8700 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0008975407937944e-05, "loss": 0.6711, "step": 8701 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0006980873210903e-05, "loss": 0.7158, "step": 8702 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0004986338206153e-05, "loss": 0.6307, "step": 8703 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0002991803003039e-05, "loss": 0.645, "step": 8704 }, { "epoch": 1.57, "grad_norm": 0.0, "learning_rate": 1.0000997267680905e-05, "loss": 0.5846, "step": 8705 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.999002732319098e-06, "loss": 0.596, "step": 8706 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.997008196996964e-06, "loss": 0.7956, "step": 8707 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.995013661793847e-06, "loss": 0.5969, "step": 8708 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.993019126789097e-06, "loss": 0.665, "step": 8709 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.991024592062061e-06, "loss": 0.6821, "step": 8710 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.989030057692079e-06, "loss": 0.6349, "step": 8711 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.987035523758502e-06, "loss": 0.5513, "step": 8712 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.985040990340669e-06, "loss": 0.6561, "step": 8713 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.983046457517935e-06, "loss": 0.747, "step": 8714 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.981051925369642e-06, "loss": 0.5665, "step": 8715 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.979057393975134e-06, "loss": 0.5404, "step": 8716 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.977062863413764e-06, "loss": 0.661, "step": 8717 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.975068333764865e-06, "loss": 0.6347, "step": 8718 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.973073805107795e-06, "loss": 0.6721, "step": 8719 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.971079277521895e-06, "loss": 0.5943, "step": 8720 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.969084751086507e-06, "loss": 0.6089, "step": 8721 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.967090225880984e-06, "loss": 0.7244, "step": 8722 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.965095701984669e-06, "loss": 0.6212, "step": 8723 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.963101179476902e-06, "loss": 0.5321, "step": 8724 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.961106658437043e-06, "loss": 0.5468, "step": 8725 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.959112138944419e-06, "loss": 0.6257, "step": 8726 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.957117621078387e-06, "loss": 0.6237, "step": 8727 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.95512310491829e-06, "loss": 0.6945, "step": 8728 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.953128590543471e-06, "loss": 0.7391, "step": 8729 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.951134078033278e-06, "loss": 0.7172, "step": 8730 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.949139567467054e-06, "loss": 0.5906, "step": 8731 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.947145058924145e-06, "loss": 0.7518, "step": 8732 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.945150552483902e-06, "loss": 0.5934, "step": 8733 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.943156048225658e-06, "loss": 0.6347, "step": 8734 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.941161546228765e-06, "loss": 0.6697, "step": 8735 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.939167046572566e-06, "loss": 0.6384, "step": 8736 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.937172549336405e-06, "loss": 0.6892, "step": 8737 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.93517805459963e-06, "loss": 0.5067, "step": 8738 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.933183562441581e-06, "loss": 0.6463, "step": 8739 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.93118907294161e-06, "loss": 0.6804, "step": 8740 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.929194586179051e-06, "loss": 0.6839, "step": 8741 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.92720010223325e-06, "loss": 0.6463, "step": 8742 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.925205621183557e-06, "loss": 0.6896, "step": 8743 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.923211143109312e-06, "loss": 0.5652, "step": 8744 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.921216668089857e-06, "loss": 0.5437, "step": 8745 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.91922219620454e-06, "loss": 0.6368, "step": 8746 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.917227727532705e-06, "loss": 0.7085, "step": 8747 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.915233262153692e-06, "loss": 0.5721, "step": 8748 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.913238800146844e-06, "loss": 0.5927, "step": 8749 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.911244341591502e-06, "loss": 0.6256, "step": 8750 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.909249886567015e-06, "loss": 0.6252, "step": 8751 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.907255435152724e-06, "loss": 0.6429, "step": 8752 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.90526098742797e-06, "loss": 0.5617, "step": 8753 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.903266543472093e-06, "loss": 0.5737, "step": 8754 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.901272103364443e-06, "loss": 0.5374, "step": 8755 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.899277667184357e-06, "loss": 0.5884, "step": 8756 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.897283235011176e-06, "loss": 0.555, "step": 8757 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.895288806924245e-06, "loss": 0.7318, "step": 8758 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.893294383002904e-06, "loss": 0.592, "step": 8759 }, { "epoch": 1.58, "grad_norm": 0.0, "learning_rate": 9.891299963326497e-06, "loss": 0.7072, "step": 8760 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.889305547974361e-06, "loss": 0.5873, "step": 8761 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.88731113702584e-06, "loss": 0.5965, "step": 8762 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.885316730560281e-06, "loss": 0.5923, "step": 8763 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.883322328657013e-06, "loss": 0.6613, "step": 8764 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.881327931395384e-06, "loss": 0.6322, "step": 8765 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.879333538854733e-06, "loss": 0.6588, "step": 8766 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.877339151114399e-06, "loss": 0.5924, "step": 8767 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.875344768253725e-06, "loss": 0.6045, "step": 8768 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.873350390352051e-06, "loss": 0.644, "step": 8769 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.871356017488716e-06, "loss": 0.7264, "step": 8770 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.869361649743059e-06, "loss": 0.6181, "step": 8771 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.867367287194416e-06, "loss": 0.6425, "step": 8772 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.865372929922132e-06, "loss": 0.5894, "step": 8773 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.863378578005542e-06, "loss": 0.6232, "step": 8774 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.861384231523984e-06, "loss": 0.6293, "step": 8775 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.859389890556803e-06, "loss": 0.6678, "step": 8776 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.857395555183332e-06, "loss": 0.707, "step": 8777 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.855401225482913e-06, "loss": 0.6354, "step": 8778 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.853406901534878e-06, "loss": 0.6723, "step": 8779 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.851412583418565e-06, "loss": 0.6269, "step": 8780 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.849418271213318e-06, "loss": 0.6648, "step": 8781 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.847423964998468e-06, "loss": 0.6024, "step": 8782 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.845429664853353e-06, "loss": 0.6793, "step": 8783 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.843435370857313e-06, "loss": 0.7598, "step": 8784 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.841441083089681e-06, "loss": 0.5697, "step": 8785 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.839446801629798e-06, "loss": 0.6042, "step": 8786 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.837452526556994e-06, "loss": 0.6138, "step": 8787 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.835458257950604e-06, "loss": 0.565, "step": 8788 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.833463995889967e-06, "loss": 0.5195, "step": 8789 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.83146974045442e-06, "loss": 0.6278, "step": 8790 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.829475491723293e-06, "loss": 0.595, "step": 8791 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.827481249775925e-06, "loss": 0.756, "step": 8792 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.82548701469165e-06, "loss": 0.5937, "step": 8793 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.823492786549799e-06, "loss": 0.7309, "step": 8794 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.821498565429707e-06, "loss": 0.6809, "step": 8795 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.819504351410706e-06, "loss": 0.651, "step": 8796 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.817510144572133e-06, "loss": 0.5068, "step": 8797 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.81551594499332e-06, "loss": 0.5397, "step": 8798 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.813521752753597e-06, "loss": 0.6683, "step": 8799 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.811527567932296e-06, "loss": 0.6213, "step": 8800 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.809533390608756e-06, "loss": 0.659, "step": 8801 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.807539220862303e-06, "loss": 0.5915, "step": 8802 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.80554505877227e-06, "loss": 0.6904, "step": 8803 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.803550904417985e-06, "loss": 0.7223, "step": 8804 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.801556757878781e-06, "loss": 0.635, "step": 8805 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.799562619233991e-06, "loss": 0.5853, "step": 8806 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.797568488562942e-06, "loss": 0.5405, "step": 8807 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.79557436594497e-06, "loss": 0.5712, "step": 8808 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.793580251459397e-06, "loss": 0.6624, "step": 8809 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.791586145185551e-06, "loss": 0.6313, "step": 8810 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.789592047202771e-06, "loss": 0.6444, "step": 8811 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.787597957590377e-06, "loss": 0.5813, "step": 8812 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.7856038764277e-06, "loss": 0.6433, "step": 8813 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.783609803794069e-06, "loss": 0.5487, "step": 8814 }, { "epoch": 1.59, "grad_norm": 0.0, "learning_rate": 9.781615739768809e-06, "loss": 0.7438, "step": 8815 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.779621684431254e-06, "loss": 0.571, "step": 8816 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.777627637860722e-06, "loss": 0.6153, "step": 8817 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.775633600136543e-06, "loss": 0.5613, "step": 8818 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.773639571338044e-06, "loss": 0.7255, "step": 8819 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.771645551544551e-06, "loss": 0.595, "step": 8820 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.769651540835389e-06, "loss": 0.7088, "step": 8821 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.767657539289883e-06, "loss": 0.6627, "step": 8822 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.765663546987361e-06, "loss": 0.6852, "step": 8823 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.763669564007143e-06, "loss": 0.6152, "step": 8824 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.761675590428554e-06, "loss": 0.5639, "step": 8825 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.759681626330917e-06, "loss": 0.6923, "step": 8826 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.75768767179356e-06, "loss": 0.6441, "step": 8827 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.755693726895799e-06, "loss": 0.5506, "step": 8828 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.75369979171696e-06, "loss": 0.6022, "step": 8829 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.751705866336366e-06, "loss": 0.6825, "step": 8830 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.749711950833341e-06, "loss": 0.7734, "step": 8831 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.747718045287202e-06, "loss": 0.6086, "step": 8832 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.74572414977727e-06, "loss": 0.5983, "step": 8833 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.743730264382866e-06, "loss": 0.6336, "step": 8834 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.741736389183313e-06, "loss": 0.5655, "step": 8835 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.739742524257928e-06, "loss": 0.6795, "step": 8836 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.737748669686028e-06, "loss": 0.6908, "step": 8837 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.73575482554694e-06, "loss": 0.626, "step": 8838 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.73376099191998e-06, "loss": 0.678, "step": 8839 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.73176716888446e-06, "loss": 0.6165, "step": 8840 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.729773356519702e-06, "loss": 0.5307, "step": 8841 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.72777955490502e-06, "loss": 0.5557, "step": 8842 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.725785764119738e-06, "loss": 0.678, "step": 8843 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.723791984243164e-06, "loss": 0.6389, "step": 8844 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.721798215354622e-06, "loss": 0.6132, "step": 8845 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.719804457533424e-06, "loss": 0.6156, "step": 8846 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.71781071085888e-06, "loss": 0.6654, "step": 8847 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.715816975410313e-06, "loss": 0.6979, "step": 8848 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.713823251267033e-06, "loss": 0.6222, "step": 8849 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.711829538508354e-06, "loss": 0.572, "step": 8850 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.709835837213586e-06, "loss": 0.6955, "step": 8851 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.70784214746205e-06, "loss": 0.5549, "step": 8852 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.705848469333054e-06, "loss": 0.6679, "step": 8853 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.70385480290591e-06, "loss": 0.5784, "step": 8854 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.701861148259928e-06, "loss": 0.7602, "step": 8855 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.69986750547442e-06, "loss": 0.6227, "step": 8856 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.697873874628696e-06, "loss": 0.6559, "step": 8857 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.695880255802066e-06, "loss": 0.6394, "step": 8858 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.693886649073842e-06, "loss": 0.7086, "step": 8859 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.691893054523332e-06, "loss": 0.6602, "step": 8860 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.689899472229846e-06, "loss": 0.664, "step": 8861 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.687905902272687e-06, "loss": 0.7597, "step": 8862 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.685912344731167e-06, "loss": 0.6849, "step": 8863 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.683918799684591e-06, "loss": 0.6768, "step": 8864 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.681925267212267e-06, "loss": 0.5976, "step": 8865 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.679931747393502e-06, "loss": 0.6323, "step": 8866 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.677938240307597e-06, "loss": 0.6335, "step": 8867 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.675944746033864e-06, "loss": 0.8797, "step": 8868 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.673951264651606e-06, "loss": 0.7282, "step": 8869 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.671957796240123e-06, "loss": 0.6253, "step": 8870 }, { "epoch": 1.6, "grad_norm": 0.0, "learning_rate": 9.669964340878718e-06, "loss": 0.6459, "step": 8871 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.6679708986467e-06, "loss": 0.7258, "step": 8872 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.665977469623366e-06, "loss": 0.5758, "step": 8873 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.663984053888023e-06, "loss": 0.6631, "step": 8874 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.661990651519966e-06, "loss": 0.5029, "step": 8875 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.659997262598509e-06, "loss": 0.6472, "step": 8876 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.658003887202935e-06, "loss": 0.5804, "step": 8877 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.656010525412556e-06, "loss": 0.524, "step": 8878 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.654017177306667e-06, "loss": 0.6407, "step": 8879 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.652023842964563e-06, "loss": 0.6432, "step": 8880 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.65003052246555e-06, "loss": 0.6859, "step": 8881 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.648037215888923e-06, "loss": 0.605, "step": 8882 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.646043923313976e-06, "loss": 0.5638, "step": 8883 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.644050644820016e-06, "loss": 0.5596, "step": 8884 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.642057380486323e-06, "loss": 0.5676, "step": 8885 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.640064130392202e-06, "loss": 0.647, "step": 8886 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.638070894616949e-06, "loss": 0.6688, "step": 8887 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.636077673239852e-06, "loss": 0.633, "step": 8888 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.63408446634021e-06, "loss": 0.6958, "step": 8889 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.632091273997317e-06, "loss": 0.6174, "step": 8890 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.630098096290462e-06, "loss": 0.7042, "step": 8891 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.628104933298941e-06, "loss": 0.5695, "step": 8892 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.626111785102038e-06, "loss": 0.6445, "step": 8893 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.62411865177905e-06, "loss": 0.7733, "step": 8894 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.622125533409268e-06, "loss": 0.5667, "step": 8895 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.620132430071979e-06, "loss": 0.5661, "step": 8896 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.61813934184647e-06, "loss": 0.562, "step": 8897 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.616146268812035e-06, "loss": 0.6389, "step": 8898 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.61415321104796e-06, "loss": 0.615, "step": 8899 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.612160168633531e-06, "loss": 0.6567, "step": 8900 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.610167141648034e-06, "loss": 0.6021, "step": 8901 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.608174130170752e-06, "loss": 0.7623, "step": 8902 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.606181134280978e-06, "loss": 0.6335, "step": 8903 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.604188154057994e-06, "loss": 0.6, "step": 8904 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.60219518958108e-06, "loss": 0.6538, "step": 8905 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.600202240929525e-06, "loss": 0.5762, "step": 8906 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.598209308182613e-06, "loss": 0.6443, "step": 8907 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.596216391419619e-06, "loss": 0.7063, "step": 8908 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.594223490719829e-06, "loss": 0.7464, "step": 8909 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.592230606162523e-06, "loss": 0.6164, "step": 8910 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.590237737826983e-06, "loss": 0.6912, "step": 8911 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.588244885792488e-06, "loss": 0.7107, "step": 8912 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.586252050138315e-06, "loss": 0.7434, "step": 8913 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.584259230943752e-06, "loss": 0.5562, "step": 8914 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.58226642828806e-06, "loss": 0.7205, "step": 8915 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.580273642250528e-06, "loss": 0.623, "step": 8916 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.57828087291043e-06, "loss": 0.6963, "step": 8917 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.57628812034704e-06, "loss": 0.6368, "step": 8918 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.574295384639635e-06, "loss": 0.6321, "step": 8919 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.57230266586749e-06, "loss": 0.7202, "step": 8920 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.570309964109874e-06, "loss": 0.6614, "step": 8921 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.56831727944607e-06, "loss": 0.6743, "step": 8922 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.56632461195534e-06, "loss": 0.6332, "step": 8923 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.56433196171696e-06, "loss": 0.6742, "step": 8924 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.562339328810203e-06, "loss": 0.6319, "step": 8925 }, { "epoch": 1.61, "grad_norm": 0.0, "learning_rate": 9.560346713314332e-06, "loss": 0.5668, "step": 8926 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.558354115308627e-06, "loss": 0.5975, "step": 8927 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.55636153487235e-06, "loss": 0.6567, "step": 8928 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.554368972084774e-06, "loss": 0.6263, "step": 8929 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.552376427025161e-06, "loss": 0.7716, "step": 8930 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.55038389977278e-06, "loss": 0.6071, "step": 8931 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.5483913904069e-06, "loss": 0.6504, "step": 8932 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.546398899006782e-06, "loss": 0.5862, "step": 8933 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.54440642565169e-06, "loss": 0.7237, "step": 8934 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.542413970420897e-06, "loss": 0.5555, "step": 8935 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.540421533393657e-06, "loss": 0.6062, "step": 8936 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.53842911464924e-06, "loss": 0.7409, "step": 8937 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.536436714266898e-06, "loss": 0.5979, "step": 8938 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.534444332325901e-06, "loss": 0.6456, "step": 8939 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.532451968905503e-06, "loss": 0.6027, "step": 8940 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.530459624084967e-06, "loss": 0.627, "step": 8941 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.528467297943553e-06, "loss": 0.5939, "step": 8942 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.526474990560515e-06, "loss": 0.6861, "step": 8943 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.524482702015117e-06, "loss": 0.592, "step": 8944 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.522490432386611e-06, "loss": 0.6134, "step": 8945 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.520498181754254e-06, "loss": 0.6412, "step": 8946 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.5185059501973e-06, "loss": 0.6512, "step": 8947 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.516513737795001e-06, "loss": 0.658, "step": 8948 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.514521544626618e-06, "loss": 0.6646, "step": 8949 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.512529370771397e-06, "loss": 0.681, "step": 8950 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.510537216308592e-06, "loss": 0.5704, "step": 8951 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.508545081317462e-06, "loss": 0.6028, "step": 8952 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.506552965877243e-06, "loss": 0.5724, "step": 8953 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.504560870067194e-06, "loss": 0.6417, "step": 8954 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.502568793966564e-06, "loss": 0.6675, "step": 8955 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.500576737654596e-06, "loss": 0.711, "step": 8956 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.498584701210544e-06, "loss": 0.7107, "step": 8957 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.496592684713652e-06, "loss": 0.6608, "step": 8958 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.494600688243162e-06, "loss": 0.6384, "step": 8959 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.492608711878329e-06, "loss": 0.6686, "step": 8960 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.490616755698385e-06, "loss": 0.6281, "step": 8961 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.488624819782581e-06, "loss": 0.6381, "step": 8962 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.486632904210157e-06, "loss": 0.6966, "step": 8963 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.484641009060353e-06, "loss": 0.6342, "step": 8964 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.482649134412416e-06, "loss": 0.6545, "step": 8965 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.480657280345582e-06, "loss": 0.6473, "step": 8966 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.478665446939093e-06, "loss": 0.655, "step": 8967 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.476673634272186e-06, "loss": 0.6698, "step": 8968 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.474681842424093e-06, "loss": 0.8084, "step": 8969 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.47269007147406e-06, "loss": 0.6036, "step": 8970 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.470698321501318e-06, "loss": 0.6737, "step": 8971 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.468706592585103e-06, "loss": 0.5284, "step": 8972 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.46671488480465e-06, "loss": 0.5858, "step": 8973 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.464723198239193e-06, "loss": 0.6558, "step": 8974 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.462731532967969e-06, "loss": 0.6551, "step": 8975 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.4607398890702e-06, "loss": 0.6543, "step": 8976 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.45874826662512e-06, "loss": 0.6231, "step": 8977 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.456756665711964e-06, "loss": 0.6395, "step": 8978 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.45476508640996e-06, "loss": 0.5537, "step": 8979 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.452773528798332e-06, "loss": 0.5984, "step": 8980 }, { "epoch": 1.62, "grad_norm": 0.0, "learning_rate": 9.450781992956311e-06, "loss": 0.5823, "step": 8981 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.448790478963126e-06, "loss": 0.6547, "step": 8982 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.446798986897997e-06, "loss": 0.6703, "step": 8983 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.444807516840154e-06, "loss": 0.6211, "step": 8984 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.442816068868818e-06, "loss": 0.7506, "step": 8985 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.44082464306321e-06, "loss": 0.7895, "step": 8986 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.43883323950256e-06, "loss": 0.6188, "step": 8987 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.436841858266084e-06, "loss": 0.6634, "step": 8988 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.434850499433001e-06, "loss": 0.6108, "step": 8989 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.43285916308254e-06, "loss": 0.6501, "step": 8990 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.430867849293908e-06, "loss": 0.7115, "step": 8991 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.428876558146328e-06, "loss": 0.5381, "step": 8992 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.426885289719019e-06, "loss": 0.703, "step": 8993 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.424894044091191e-06, "loss": 0.5503, "step": 8994 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.422902821342067e-06, "loss": 0.6171, "step": 8995 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.420911621550856e-06, "loss": 0.7494, "step": 8996 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.41892044479677e-06, "loss": 0.641, "step": 8997 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.416929291159031e-06, "loss": 0.6209, "step": 8998 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.414938160716839e-06, "loss": 0.5848, "step": 8999 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.41294705354941e-06, "loss": 0.6796, "step": 9000 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.410955969735953e-06, "loss": 0.6143, "step": 9001 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.408964909355674e-06, "loss": 0.6652, "step": 9002 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.406973872487788e-06, "loss": 0.5955, "step": 9003 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.404982859211493e-06, "loss": 0.5838, "step": 9004 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.402991869606004e-06, "loss": 0.613, "step": 9005 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.40100090375052e-06, "loss": 0.5788, "step": 9006 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.399009961724241e-06, "loss": 0.5979, "step": 9007 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.39701904360638e-06, "loss": 0.6188, "step": 9008 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.395028149476134e-06, "loss": 0.5693, "step": 9009 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.3930372794127e-06, "loss": 0.644, "step": 9010 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.391046433495287e-06, "loss": 0.6605, "step": 9011 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.389055611803092e-06, "loss": 0.6343, "step": 9012 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.387064814415311e-06, "loss": 0.5688, "step": 9013 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.385074041411139e-06, "loss": 0.6102, "step": 9014 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.383083292869775e-06, "loss": 0.5818, "step": 9015 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.381092568870416e-06, "loss": 0.732, "step": 9016 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.379101869492254e-06, "loss": 0.71, "step": 9017 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.377111194814482e-06, "loss": 0.5844, "step": 9018 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.375120544916296e-06, "loss": 0.5674, "step": 9019 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.373129919876886e-06, "loss": 0.6381, "step": 9020 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.37113931977544e-06, "loss": 0.6499, "step": 9021 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.36914874469115e-06, "loss": 0.7116, "step": 9022 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.3671581947032e-06, "loss": 0.5822, "step": 9023 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.365167669890785e-06, "loss": 0.554, "step": 9024 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.363177170333087e-06, "loss": 0.599, "step": 9025 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.36118669610929e-06, "loss": 0.6543, "step": 9026 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.359196247298581e-06, "loss": 0.6432, "step": 9027 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.357205823980147e-06, "loss": 0.6549, "step": 9028 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.355215426233165e-06, "loss": 0.6414, "step": 9029 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.353225054136817e-06, "loss": 0.6394, "step": 9030 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.351234707770284e-06, "loss": 0.7919, "step": 9031 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.349244387212744e-06, "loss": 0.6153, "step": 9032 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.34725409254338e-06, "loss": 0.6273, "step": 9033 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.345263823841365e-06, "loss": 0.6254, "step": 9034 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.343273581185879e-06, "loss": 0.7364, "step": 9035 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.341283364656095e-06, "loss": 0.6255, "step": 9036 }, { "epoch": 1.63, "grad_norm": 0.0, "learning_rate": 9.339293174331183e-06, "loss": 0.6655, "step": 9037 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.337303010290321e-06, "loss": 0.6365, "step": 9038 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.335312872612683e-06, "loss": 0.6846, "step": 9039 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.333322761377434e-06, "loss": 0.7597, "step": 9040 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.331332676663751e-06, "loss": 0.5564, "step": 9041 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.3293426185508e-06, "loss": 0.5806, "step": 9042 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.32735258711775e-06, "loss": 0.6781, "step": 9043 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.325362582443763e-06, "loss": 0.6109, "step": 9044 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.323372604608008e-06, "loss": 0.7066, "step": 9045 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.321382653689652e-06, "loss": 0.622, "step": 9046 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.319392729767855e-06, "loss": 0.6885, "step": 9047 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.317402832921779e-06, "loss": 0.6168, "step": 9048 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.31541296323059e-06, "loss": 0.5677, "step": 9049 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.313423120773448e-06, "loss": 0.5778, "step": 9050 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.311433305629512e-06, "loss": 0.5215, "step": 9051 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.309443517877935e-06, "loss": 0.6537, "step": 9052 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.307453757597876e-06, "loss": 0.5375, "step": 9053 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.305464024868495e-06, "loss": 0.5712, "step": 9054 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.303474319768946e-06, "loss": 0.689, "step": 9055 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.30148464237838e-06, "loss": 0.5509, "step": 9056 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.299494992775952e-06, "loss": 0.7237, "step": 9057 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.297505371040816e-06, "loss": 0.5964, "step": 9058 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.295515777252119e-06, "loss": 0.6638, "step": 9059 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.293526211489009e-06, "loss": 0.6395, "step": 9060 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.291536673830636e-06, "loss": 0.5614, "step": 9061 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.28954716435615e-06, "loss": 0.5917, "step": 9062 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.287557683144694e-06, "loss": 0.6299, "step": 9063 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.285568230275411e-06, "loss": 0.6087, "step": 9064 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.28357880582745e-06, "loss": 0.4884, "step": 9065 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.281589409879956e-06, "loss": 0.5642, "step": 9066 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.279600042512062e-06, "loss": 0.6431, "step": 9067 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.277610703802911e-06, "loss": 0.7652, "step": 9068 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.275621393831644e-06, "loss": 0.5578, "step": 9069 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.273632112677401e-06, "loss": 0.7175, "step": 9070 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.271642860419315e-06, "loss": 0.6845, "step": 9071 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.269653637136525e-06, "loss": 0.615, "step": 9072 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.267664442908166e-06, "loss": 0.5339, "step": 9073 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.265675277813368e-06, "loss": 0.6754, "step": 9074 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.263686141931266e-06, "loss": 0.6955, "step": 9075 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.26169703534099e-06, "loss": 0.6504, "step": 9076 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.25970795812167e-06, "loss": 0.6631, "step": 9077 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.257718910352435e-06, "loss": 0.6417, "step": 9078 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.255729892112415e-06, "loss": 0.633, "step": 9079 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.253740903480734e-06, "loss": 0.6415, "step": 9080 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.251751944536522e-06, "loss": 0.5022, "step": 9081 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.249763015358897e-06, "loss": 0.6614, "step": 9082 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.247774116026982e-06, "loss": 0.5272, "step": 9083 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.245785246619903e-06, "loss": 0.6466, "step": 9084 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.24379640721678e-06, "loss": 0.611, "step": 9085 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.241807597896728e-06, "loss": 0.6603, "step": 9086 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.239818818738872e-06, "loss": 0.6029, "step": 9087 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.237830069822327e-06, "loss": 0.6361, "step": 9088 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.235841351226206e-06, "loss": 0.6148, "step": 9089 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.233852663029624e-06, "loss": 0.6589, "step": 9090 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.231864005311695e-06, "loss": 0.6177, "step": 9091 }, { "epoch": 1.64, "grad_norm": 0.0, "learning_rate": 9.229875378151533e-06, "loss": 0.6165, "step": 9092 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.227886781628246e-06, "loss": 0.5817, "step": 9093 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.225898215820946e-06, "loss": 0.567, "step": 9094 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.223909680808743e-06, "loss": 0.5832, "step": 9095 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.221921176670744e-06, "loss": 0.5555, "step": 9096 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.21993270348605e-06, "loss": 0.7314, "step": 9097 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.217944261333771e-06, "loss": 0.5594, "step": 9098 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.215955850293007e-06, "loss": 0.7542, "step": 9099 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.213967470442865e-06, "loss": 0.5775, "step": 9100 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.211979121862442e-06, "loss": 0.6416, "step": 9101 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.209990804630838e-06, "loss": 0.674, "step": 9102 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.208002518827155e-06, "loss": 0.6626, "step": 9103 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.206014264530491e-06, "loss": 0.6844, "step": 9104 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.204026041819937e-06, "loss": 0.7165, "step": 9105 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.202037850774592e-06, "loss": 0.6816, "step": 9106 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.200049691473545e-06, "loss": 0.6406, "step": 9107 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.198061563995894e-06, "loss": 0.5687, "step": 9108 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.196073468420729e-06, "loss": 0.6609, "step": 9109 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.194085404827135e-06, "loss": 0.6161, "step": 9110 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.192097373294211e-06, "loss": 0.5762, "step": 9111 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.19010937390103e-06, "loss": 0.5965, "step": 9112 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.18812140672669e-06, "loss": 0.6013, "step": 9113 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.186133471850271e-06, "loss": 0.651, "step": 9114 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.184145569350853e-06, "loss": 0.5071, "step": 9115 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.182157699307525e-06, "loss": 0.603, "step": 9116 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.180169861799363e-06, "loss": 0.6021, "step": 9117 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.178182056905451e-06, "loss": 0.6829, "step": 9118 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.176194284704864e-06, "loss": 0.6583, "step": 9119 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.174206545276678e-06, "loss": 0.6879, "step": 9120 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.172218838699972e-06, "loss": 0.6669, "step": 9121 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.170231165053818e-06, "loss": 0.6223, "step": 9122 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.16824352441729e-06, "loss": 0.6252, "step": 9123 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.166255916869456e-06, "loss": 0.6822, "step": 9124 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.164268342489394e-06, "loss": 0.6368, "step": 9125 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.16228080135617e-06, "loss": 0.5766, "step": 9126 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.160293293548848e-06, "loss": 0.6203, "step": 9127 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.158305819146499e-06, "loss": 0.5831, "step": 9128 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.156318378228183e-06, "loss": 0.682, "step": 9129 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.15433097087297e-06, "loss": 0.6779, "step": 9130 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.15234359715992e-06, "loss": 0.6169, "step": 9131 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.150356257168091e-06, "loss": 0.6108, "step": 9132 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.148368950976549e-06, "loss": 0.5289, "step": 9133 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.146381678664348e-06, "loss": 0.7371, "step": 9134 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.144394440310547e-06, "loss": 0.5971, "step": 9135 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.1424072359942e-06, "loss": 0.6413, "step": 9136 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.140420065794361e-06, "loss": 0.6319, "step": 9137 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.138432929790085e-06, "loss": 0.6618, "step": 9138 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.136445828060423e-06, "loss": 0.5747, "step": 9139 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.134458760684425e-06, "loss": 0.662, "step": 9140 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.132471727741145e-06, "loss": 0.6607, "step": 9141 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.130484729309621e-06, "loss": 0.6007, "step": 9142 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.128497765468905e-06, "loss": 0.6315, "step": 9143 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.12651083629804e-06, "loss": 0.6426, "step": 9144 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.12452394187607e-06, "loss": 0.592, "step": 9145 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.122537082282038e-06, "loss": 0.8017, "step": 9146 }, { "epoch": 1.65, "grad_norm": 0.0, "learning_rate": 9.120550257594985e-06, "loss": 0.5979, "step": 9147 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.118563467893948e-06, "loss": 0.5361, "step": 9148 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.11657671325797e-06, "loss": 0.524, "step": 9149 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.11458999376608e-06, "loss": 0.6776, "step": 9150 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.112603309497317e-06, "loss": 0.6238, "step": 9151 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.110616660530715e-06, "loss": 0.7124, "step": 9152 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.108630046945305e-06, "loss": 0.6198, "step": 9153 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.10664346882012e-06, "loss": 0.6614, "step": 9154 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.104656926234188e-06, "loss": 0.6941, "step": 9155 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.102670419266535e-06, "loss": 0.5805, "step": 9156 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.100683947996197e-06, "loss": 0.6183, "step": 9157 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.098697512502186e-06, "loss": 0.6649, "step": 9158 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.096711112863532e-06, "loss": 0.6711, "step": 9159 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.09472474915926e-06, "loss": 0.5895, "step": 9160 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.092738421468384e-06, "loss": 0.6524, "step": 9161 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.090752129869932e-06, "loss": 0.6134, "step": 9162 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.088765874442916e-06, "loss": 0.5874, "step": 9163 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.086779655266358e-06, "loss": 0.5596, "step": 9164 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.084793472419268e-06, "loss": 0.6702, "step": 9165 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.082807325980658e-06, "loss": 0.6121, "step": 9166 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.080821216029546e-06, "loss": 0.6624, "step": 9167 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.078835142644942e-06, "loss": 0.6098, "step": 9168 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.076849105905852e-06, "loss": 0.6457, "step": 9169 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.074863105891286e-06, "loss": 0.5721, "step": 9170 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.072877142680252e-06, "loss": 0.6259, "step": 9171 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.070891216351754e-06, "loss": 0.6947, "step": 9172 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.068905326984797e-06, "loss": 0.6595, "step": 9173 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.066919474658376e-06, "loss": 0.6173, "step": 9174 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.064933659451498e-06, "loss": 0.6369, "step": 9175 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.062947881443162e-06, "loss": 0.6118, "step": 9176 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.060962140712365e-06, "loss": 0.6028, "step": 9177 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.0589764373381e-06, "loss": 0.6137, "step": 9178 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.05699077139937e-06, "loss": 0.5609, "step": 9179 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.055005142975158e-06, "loss": 0.569, "step": 9180 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.053019552144462e-06, "loss": 0.6919, "step": 9181 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.05103399898627e-06, "loss": 0.6204, "step": 9182 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.049048483579569e-06, "loss": 0.7895, "step": 9183 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.04706300600335e-06, "loss": 0.6335, "step": 9184 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.0450775663366e-06, "loss": 0.6043, "step": 9185 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.043092164658293e-06, "loss": 0.6063, "step": 9186 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.041106801047429e-06, "loss": 0.5927, "step": 9187 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.039121475582973e-06, "loss": 0.5993, "step": 9188 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.03713618834391e-06, "loss": 0.6316, "step": 9189 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.035150939409221e-06, "loss": 0.6016, "step": 9190 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.03316572885788e-06, "loss": 0.7607, "step": 9191 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.031180556768862e-06, "loss": 0.6369, "step": 9192 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.029195423221142e-06, "loss": 0.6741, "step": 9193 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.027210328293693e-06, "loss": 0.7003, "step": 9194 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.025225272065481e-06, "loss": 0.6984, "step": 9195 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.023240254615478e-06, "loss": 0.6123, "step": 9196 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.021255276022652e-06, "loss": 0.6031, "step": 9197 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.019270336365967e-06, "loss": 0.637, "step": 9198 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.017285435724385e-06, "loss": 0.6963, "step": 9199 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.015300574176875e-06, "loss": 0.5919, "step": 9200 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.013315751802395e-06, "loss": 0.6018, "step": 9201 }, { "epoch": 1.66, "grad_norm": 0.0, "learning_rate": 9.011330968679906e-06, "loss": 0.6422, "step": 9202 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 9.009346224888365e-06, "loss": 0.5764, "step": 9203 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 9.007361520506724e-06, "loss": 0.5982, "step": 9204 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 9.005376855613946e-06, "loss": 0.5929, "step": 9205 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 9.003392230288979e-06, "loss": 0.6633, "step": 9206 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 9.001407644610774e-06, "loss": 0.6489, "step": 9207 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.999423098658286e-06, "loss": 0.833, "step": 9208 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.997438592510462e-06, "loss": 0.6974, "step": 9209 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.99545412624625e-06, "loss": 0.6237, "step": 9210 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.993469699944593e-06, "loss": 0.5658, "step": 9211 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.991485313684431e-06, "loss": 0.7165, "step": 9212 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.989500967544715e-06, "loss": 0.6304, "step": 9213 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.987516661604381e-06, "loss": 0.5263, "step": 9214 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.98553239594237e-06, "loss": 0.5527, "step": 9215 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.983548170637612e-06, "loss": 0.7167, "step": 9216 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.981563985769057e-06, "loss": 0.6179, "step": 9217 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.979579841415628e-06, "loss": 0.6583, "step": 9218 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.977595737656262e-06, "loss": 0.6484, "step": 9219 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.975611674569888e-06, "loss": 0.5791, "step": 9220 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.973627652235434e-06, "loss": 0.6187, "step": 9221 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.971643670731831e-06, "loss": 0.6624, "step": 9222 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.969659730138007e-06, "loss": 0.6663, "step": 9223 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.967675830532881e-06, "loss": 0.6679, "step": 9224 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.965691971995386e-06, "loss": 0.6066, "step": 9225 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.963708154604428e-06, "loss": 0.5468, "step": 9226 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.96172437843894e-06, "loss": 0.6508, "step": 9227 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.959740643577833e-06, "loss": 0.6233, "step": 9228 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.957756950100024e-06, "loss": 0.5507, "step": 9229 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.955773298084429e-06, "loss": 0.6286, "step": 9230 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.953789687609963e-06, "loss": 0.6119, "step": 9231 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.951806118755536e-06, "loss": 0.5794, "step": 9232 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.949822591600057e-06, "loss": 0.6741, "step": 9233 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.947839106222432e-06, "loss": 0.6055, "step": 9234 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.945855662701572e-06, "loss": 0.6732, "step": 9235 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.943872261116379e-06, "loss": 0.649, "step": 9236 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.941888901545754e-06, "loss": 0.5894, "step": 9237 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.939905584068605e-06, "loss": 0.7955, "step": 9238 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.937922308763826e-06, "loss": 0.5139, "step": 9239 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.935939075710319e-06, "loss": 0.6673, "step": 9240 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.933955884986977e-06, "loss": 0.6447, "step": 9241 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.931972736672694e-06, "loss": 0.6718, "step": 9242 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.929989630846369e-06, "loss": 0.6874, "step": 9243 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.928006567586886e-06, "loss": 0.6513, "step": 9244 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.926023546973139e-06, "loss": 0.5964, "step": 9245 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.924040569084016e-06, "loss": 0.6736, "step": 9246 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.922057633998406e-06, "loss": 0.6111, "step": 9247 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.920074741795185e-06, "loss": 0.6501, "step": 9248 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.918091892553244e-06, "loss": 0.5854, "step": 9249 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.916109086351457e-06, "loss": 0.5947, "step": 9250 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.914126323268709e-06, "loss": 0.6273, "step": 9251 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.912143603383876e-06, "loss": 0.489, "step": 9252 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.910160926775833e-06, "loss": 0.6332, "step": 9253 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.908178293523458e-06, "loss": 0.6976, "step": 9254 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.90619570370562e-06, "loss": 0.641, "step": 9255 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.90421315740119e-06, "loss": 0.649, "step": 9256 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.90223065468904e-06, "loss": 0.6302, "step": 9257 }, { "epoch": 1.67, "grad_norm": 0.0, "learning_rate": 8.90024819564803e-06, "loss": 0.6173, "step": 9258 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.898265780357036e-06, "loss": 0.7211, "step": 9259 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.896283408894915e-06, "loss": 0.6704, "step": 9260 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.89430108134053e-06, "loss": 0.752, "step": 9261 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.89231879777274e-06, "loss": 0.5364, "step": 9262 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.890336558270412e-06, "loss": 0.6138, "step": 9263 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.888354362912393e-06, "loss": 0.6766, "step": 9264 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.886372211777543e-06, "loss": 0.562, "step": 9265 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.884390104944714e-06, "loss": 0.6168, "step": 9266 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.882408042492755e-06, "loss": 0.6192, "step": 9267 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.88042602450052e-06, "loss": 0.6528, "step": 9268 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.878444051046856e-06, "loss": 0.5084, "step": 9269 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.876462122210611e-06, "loss": 0.6621, "step": 9270 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.874480238070626e-06, "loss": 0.6479, "step": 9271 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.872498398705743e-06, "loss": 0.6359, "step": 9272 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.870516604194808e-06, "loss": 0.6189, "step": 9273 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.868534854616656e-06, "loss": 0.649, "step": 9274 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.866553150050125e-06, "loss": 0.6915, "step": 9275 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.864571490574053e-06, "loss": 0.6788, "step": 9276 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.862589876267272e-06, "loss": 0.6088, "step": 9277 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.860608307208618e-06, "loss": 0.6692, "step": 9278 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.858626783476912e-06, "loss": 0.6412, "step": 9279 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.856645305150987e-06, "loss": 0.5454, "step": 9280 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.854663872309673e-06, "loss": 0.5438, "step": 9281 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.852682485031792e-06, "loss": 0.7112, "step": 9282 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.850701143396167e-06, "loss": 0.8257, "step": 9283 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.84871984748162e-06, "loss": 0.5538, "step": 9284 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.84673859736697e-06, "loss": 0.5972, "step": 9285 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.844757393131035e-06, "loss": 0.7439, "step": 9286 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.842776234852628e-06, "loss": 0.6227, "step": 9287 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.840795122610563e-06, "loss": 0.6244, "step": 9288 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.838814056483658e-06, "loss": 0.6511, "step": 9289 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.836833036550717e-06, "loss": 0.6931, "step": 9290 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.834852062890549e-06, "loss": 0.5282, "step": 9291 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.832871135581962e-06, "loss": 0.5332, "step": 9292 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.830890254703766e-06, "loss": 0.5372, "step": 9293 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.828909420334754e-06, "loss": 0.6863, "step": 9294 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.826928632553732e-06, "loss": 0.5738, "step": 9295 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.824947891439498e-06, "loss": 0.6664, "step": 9296 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.822967197070851e-06, "loss": 0.6479, "step": 9297 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.820986549526584e-06, "loss": 0.6781, "step": 9298 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.819005948885491e-06, "loss": 0.6316, "step": 9299 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.817025395226372e-06, "loss": 0.6634, "step": 9300 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.815044888628001e-06, "loss": 0.714, "step": 9301 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.813064429169177e-06, "loss": 0.5851, "step": 9302 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.811084016928684e-06, "loss": 0.6164, "step": 9303 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.809103651985302e-06, "loss": 0.6251, "step": 9304 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.80712333441782e-06, "loss": 0.5424, "step": 9305 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.805143064305016e-06, "loss": 0.6742, "step": 9306 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.803162841725666e-06, "loss": 0.6035, "step": 9307 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.801182666758551e-06, "loss": 0.5966, "step": 9308 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.799202539482439e-06, "loss": 0.6773, "step": 9309 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.797222459976109e-06, "loss": 0.6009, "step": 9310 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.79524242831833e-06, "loss": 0.7276, "step": 9311 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.793262444587874e-06, "loss": 0.6508, "step": 9312 }, { "epoch": 1.68, "grad_norm": 0.0, "learning_rate": 8.791282508863499e-06, "loss": 0.6494, "step": 9313 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.78930262122398e-06, "loss": 0.6127, "step": 9314 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.787322781748078e-06, "loss": 0.7387, "step": 9315 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.785342990514554e-06, "loss": 0.6275, "step": 9316 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.783363247602164e-06, "loss": 0.5928, "step": 9317 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.781383553089667e-06, "loss": 0.6269, "step": 9318 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.779403907055822e-06, "loss": 0.5527, "step": 9319 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.77742430957938e-06, "loss": 0.5877, "step": 9320 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.775444760739092e-06, "loss": 0.5559, "step": 9321 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.773465260613712e-06, "loss": 0.6756, "step": 9322 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.771485809281986e-06, "loss": 0.6359, "step": 9323 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.769506406822658e-06, "loss": 0.5389, "step": 9324 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.767527053314472e-06, "loss": 0.6821, "step": 9325 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.76554774883617e-06, "loss": 0.5494, "step": 9326 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.763568493466495e-06, "loss": 0.7378, "step": 9327 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.761589287284183e-06, "loss": 0.624, "step": 9328 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.759610130367968e-06, "loss": 0.5561, "step": 9329 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.75763102279659e-06, "loss": 0.6964, "step": 9330 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.75565196464878e-06, "loss": 0.6108, "step": 9331 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.753672956003264e-06, "loss": 0.6193, "step": 9332 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.751693996938774e-06, "loss": 0.583, "step": 9333 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.749715087534033e-06, "loss": 0.6135, "step": 9334 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.74773622786777e-06, "loss": 0.615, "step": 9335 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.745757418018704e-06, "loss": 0.7108, "step": 9336 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.743778658065554e-06, "loss": 0.5639, "step": 9337 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.741799948087048e-06, "loss": 0.6773, "step": 9338 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.739821288161889e-06, "loss": 0.5071, "step": 9339 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.737842678368802e-06, "loss": 0.6975, "step": 9340 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.735864118786493e-06, "loss": 0.6131, "step": 9341 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.733885609493674e-06, "loss": 0.603, "step": 9342 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.731907150569057e-06, "loss": 0.6752, "step": 9343 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.729928742091344e-06, "loss": 0.6392, "step": 9344 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.727950384139242e-06, "loss": 0.587, "step": 9345 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.725972076791457e-06, "loss": 0.6245, "step": 9346 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.723993820126679e-06, "loss": 0.7299, "step": 9347 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.722015614223616e-06, "loss": 0.5512, "step": 9348 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.72003745916096e-06, "loss": 0.6858, "step": 9349 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.718059355017405e-06, "loss": 0.6629, "step": 9350 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.716081301871649e-06, "loss": 0.6592, "step": 9351 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.714103299802375e-06, "loss": 0.543, "step": 9352 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.71212534888828e-06, "loss": 0.6635, "step": 9353 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.71014744920804e-06, "loss": 0.6727, "step": 9354 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.708169600840342e-06, "loss": 0.5905, "step": 9355 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.706191803863874e-06, "loss": 0.6764, "step": 9356 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.704214058357313e-06, "loss": 0.5709, "step": 9357 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.702236364399335e-06, "loss": 0.6495, "step": 9358 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.700258722068615e-06, "loss": 0.5902, "step": 9359 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.698281131443834e-06, "loss": 0.672, "step": 9360 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.696303592603659e-06, "loss": 0.626, "step": 9361 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.69432610562676e-06, "loss": 0.6514, "step": 9362 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.692348670591804e-06, "loss": 0.6662, "step": 9363 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.690371287577457e-06, "loss": 0.5972, "step": 9364 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.688393956662386e-06, "loss": 0.5716, "step": 9365 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.68641667792525e-06, "loss": 0.6627, "step": 9366 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.684439451444706e-06, "loss": 0.6177, "step": 9367 }, { "epoch": 1.69, "grad_norm": 0.0, "learning_rate": 8.682462277299418e-06, "loss": 0.6885, "step": 9368 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.68048515556804e-06, "loss": 0.5627, "step": 9369 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.678508086329219e-06, "loss": 0.6903, "step": 9370 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.676531069661611e-06, "loss": 0.6167, "step": 9371 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.674554105643863e-06, "loss": 0.5733, "step": 9372 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.672577194354623e-06, "loss": 0.6709, "step": 9373 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.670600335872538e-06, "loss": 0.558, "step": 9374 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.668623530276245e-06, "loss": 0.7124, "step": 9375 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.666646777644398e-06, "loss": 0.6862, "step": 9376 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.664670078055619e-06, "loss": 0.6496, "step": 9377 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.662693431588554e-06, "loss": 0.6069, "step": 9378 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.660716838321834e-06, "loss": 0.5178, "step": 9379 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.658740298334091e-06, "loss": 0.6735, "step": 9380 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.656763811703959e-06, "loss": 0.5817, "step": 9381 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.654787378510064e-06, "loss": 0.671, "step": 9382 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.65281099883103e-06, "loss": 0.6571, "step": 9383 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.650834672745488e-06, "loss": 0.5733, "step": 9384 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.648858400332046e-06, "loss": 0.6246, "step": 9385 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.646882181669335e-06, "loss": 0.6368, "step": 9386 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.64490601683597e-06, "loss": 0.6595, "step": 9387 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.64292990591056e-06, "loss": 0.6555, "step": 9388 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.640953848971727e-06, "loss": 0.595, "step": 9389 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.638977846098079e-06, "loss": 0.6077, "step": 9390 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.637001897368224e-06, "loss": 0.5972, "step": 9391 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.635026002860768e-06, "loss": 0.5532, "step": 9392 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.633050162654312e-06, "loss": 0.5617, "step": 9393 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.631074376827467e-06, "loss": 0.5767, "step": 9394 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.629098645458826e-06, "loss": 0.5976, "step": 9395 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.627122968626991e-06, "loss": 0.6849, "step": 9396 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.625147346410555e-06, "loss": 0.6447, "step": 9397 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.623171778888111e-06, "loss": 0.615, "step": 9398 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.621196266138257e-06, "loss": 0.547, "step": 9399 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.619220808239575e-06, "loss": 0.7119, "step": 9400 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.617245405270656e-06, "loss": 0.4856, "step": 9401 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.61527005731008e-06, "loss": 0.7315, "step": 9402 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.613294764436435e-06, "loss": 0.7368, "step": 9403 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.611319526728301e-06, "loss": 0.5647, "step": 9404 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.609344344264252e-06, "loss": 0.6962, "step": 9405 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.607369217122874e-06, "loss": 0.5421, "step": 9406 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.605394145382729e-06, "loss": 0.7157, "step": 9407 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.603419129122395e-06, "loss": 0.7458, "step": 9408 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.60144416842044e-06, "loss": 0.6527, "step": 9409 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.599469263355432e-06, "loss": 0.6681, "step": 9410 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.597494414005934e-06, "loss": 0.6917, "step": 9411 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.595519620450514e-06, "loss": 0.5285, "step": 9412 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.593544882767728e-06, "loss": 0.6083, "step": 9413 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.59157020103614e-06, "loss": 0.5268, "step": 9414 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.589595575334298e-06, "loss": 0.5531, "step": 9415 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.58762100574076e-06, "loss": 0.6528, "step": 9416 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.58564649233408e-06, "loss": 0.5361, "step": 9417 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.583672035192802e-06, "loss": 0.5556, "step": 9418 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.581697634395478e-06, "loss": 0.5089, "step": 9419 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.579723290020654e-06, "loss": 0.5621, "step": 9420 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.577749002146868e-06, "loss": 0.5907, "step": 9421 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.575774770852668e-06, "loss": 0.6491, "step": 9422 }, { "epoch": 1.7, "grad_norm": 0.0, "learning_rate": 8.573800596216582e-06, "loss": 0.6297, "step": 9423 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.571826478317155e-06, "loss": 0.6074, "step": 9424 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.569852417232915e-06, "loss": 0.6901, "step": 9425 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.567878413042396e-06, "loss": 0.5677, "step": 9426 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.565904465824126e-06, "loss": 0.6066, "step": 9427 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.563930575656637e-06, "loss": 0.632, "step": 9428 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.561956742618449e-06, "loss": 0.5825, "step": 9429 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.559982966788083e-06, "loss": 0.5447, "step": 9430 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.558009248244062e-06, "loss": 0.7316, "step": 9431 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.556035587064903e-06, "loss": 0.6504, "step": 9432 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.554061983329124e-06, "loss": 0.722, "step": 9433 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.552088437115233e-06, "loss": 0.6098, "step": 9434 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.550114948501746e-06, "loss": 0.5954, "step": 9435 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.54814151756717e-06, "loss": 0.6559, "step": 9436 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.546168144390016e-06, "loss": 0.639, "step": 9437 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.54419482904878e-06, "loss": 0.5734, "step": 9438 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.542221571621967e-06, "loss": 0.6271, "step": 9439 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.540248372188077e-06, "loss": 0.636, "step": 9440 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.538275230825608e-06, "loss": 0.6641, "step": 9441 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.536302147613056e-06, "loss": 0.6452, "step": 9442 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.534329122628909e-06, "loss": 0.5799, "step": 9443 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.532356155951664e-06, "loss": 0.7555, "step": 9444 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.530383247659802e-06, "loss": 0.622, "step": 9445 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.528410397831814e-06, "loss": 0.5026, "step": 9446 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.52643760654618e-06, "loss": 0.698, "step": 9447 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.524464873881382e-06, "loss": 0.6033, "step": 9448 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.522492199915901e-06, "loss": 0.6634, "step": 9449 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.52051958472821e-06, "loss": 0.6886, "step": 9450 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.518547028396783e-06, "loss": 0.5838, "step": 9451 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.516574531000099e-06, "loss": 0.6779, "step": 9452 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.514602092616616e-06, "loss": 0.6252, "step": 9453 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.512629713324808e-06, "loss": 0.6609, "step": 9454 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.510657393203138e-06, "loss": 0.5022, "step": 9455 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.508685132330066e-06, "loss": 0.6336, "step": 9456 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.506712930784057e-06, "loss": 0.6234, "step": 9457 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.504740788643566e-06, "loss": 0.5692, "step": 9458 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.502768705987047e-06, "loss": 0.595, "step": 9459 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.500796682892955e-06, "loss": 0.6257, "step": 9460 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.498824719439737e-06, "loss": 0.7055, "step": 9461 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.496852815705844e-06, "loss": 0.6277, "step": 9462 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.49488097176972e-06, "loss": 0.6637, "step": 9463 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.49290918770981e-06, "loss": 0.6491, "step": 9464 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.490937463604556e-06, "loss": 0.6483, "step": 9465 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.488965799532394e-06, "loss": 0.6634, "step": 9466 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.486994195571763e-06, "loss": 0.5554, "step": 9467 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.485022651801092e-06, "loss": 0.5662, "step": 9468 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.483051168298815e-06, "loss": 0.6633, "step": 9469 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.481079745143361e-06, "loss": 0.5911, "step": 9470 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.47910838241316e-06, "loss": 0.6766, "step": 9471 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.477137080186629e-06, "loss": 0.6824, "step": 9472 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.475165838542195e-06, "loss": 0.5567, "step": 9473 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.473194657558276e-06, "loss": 0.5524, "step": 9474 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.471223537313294e-06, "loss": 0.6475, "step": 9475 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.469252477885654e-06, "loss": 0.6151, "step": 9476 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.46728147935377e-06, "loss": 0.596, "step": 9477 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.465310541796059e-06, "loss": 0.7419, "step": 9478 }, { "epoch": 1.71, "grad_norm": 0.0, "learning_rate": 8.463339665290923e-06, "loss": 0.6077, "step": 9479 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.461368849916765e-06, "loss": 0.5885, "step": 9480 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.459398095751991e-06, "loss": 0.5798, "step": 9481 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.457427402875004e-06, "loss": 0.6657, "step": 9482 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.455456771364194e-06, "loss": 0.6252, "step": 9483 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.45348620129796e-06, "loss": 0.6597, "step": 9484 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.451515692754693e-06, "loss": 0.5826, "step": 9485 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.449545245812786e-06, "loss": 0.6441, "step": 9486 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.447574860550625e-06, "loss": 0.5609, "step": 9487 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.445604537046595e-06, "loss": 0.6354, "step": 9488 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.443634275379078e-06, "loss": 0.6494, "step": 9489 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.44166407562646e-06, "loss": 0.5979, "step": 9490 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.439693937867111e-06, "loss": 0.6439, "step": 9491 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.437723862179413e-06, "loss": 0.6002, "step": 9492 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.435753848641735e-06, "loss": 0.6368, "step": 9493 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.433783897332444e-06, "loss": 0.6794, "step": 9494 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.431814008329918e-06, "loss": 0.7195, "step": 9495 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.429844181712515e-06, "loss": 0.5524, "step": 9496 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.427874417558603e-06, "loss": 0.6442, "step": 9497 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.42590471594654e-06, "loss": 0.6596, "step": 9498 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.42393507695468e-06, "loss": 0.7078, "step": 9499 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.421965500661383e-06, "loss": 0.5622, "step": 9500 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.419995987145003e-06, "loss": 0.5279, "step": 9501 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.418026536483888e-06, "loss": 0.5759, "step": 9502 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.416057148756388e-06, "loss": 0.6721, "step": 9503 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.41408782404085e-06, "loss": 0.6368, "step": 9504 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.412118562415616e-06, "loss": 0.6468, "step": 9505 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.410149363959024e-06, "loss": 0.6591, "step": 9506 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.408180228749413e-06, "loss": 0.6561, "step": 9507 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.40621115686512e-06, "loss": 0.6433, "step": 9508 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.404242148384478e-06, "loss": 0.6429, "step": 9509 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.402273203385815e-06, "loss": 0.726, "step": 9510 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.400304321947463e-06, "loss": 0.5422, "step": 9511 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.398335504147747e-06, "loss": 0.6503, "step": 9512 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.396366750064987e-06, "loss": 0.7107, "step": 9513 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.394398059777505e-06, "loss": 0.6615, "step": 9514 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.392429433363615e-06, "loss": 0.6641, "step": 9515 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.39046087090164e-06, "loss": 0.6891, "step": 9516 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.38849237246989e-06, "loss": 0.6053, "step": 9517 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.386523938146668e-06, "loss": 0.6421, "step": 9518 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.384555568010294e-06, "loss": 0.5133, "step": 9519 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.382587262139066e-06, "loss": 0.6931, "step": 9520 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.380619020611287e-06, "loss": 0.7097, "step": 9521 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.378650843505258e-06, "loss": 0.5987, "step": 9522 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.376682730899273e-06, "loss": 0.6403, "step": 9523 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.374714682871633e-06, "loss": 0.6259, "step": 9524 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.372746699500626e-06, "loss": 0.6752, "step": 9525 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.37077878086454e-06, "loss": 0.6135, "step": 9526 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.368810927041668e-06, "loss": 0.5707, "step": 9527 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.366843138110294e-06, "loss": 0.7357, "step": 9528 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.364875414148696e-06, "loss": 0.6439, "step": 9529 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.362907755235155e-06, "loss": 0.6327, "step": 9530 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.360940161447945e-06, "loss": 0.6981, "step": 9531 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.358972632865348e-06, "loss": 0.6445, "step": 9532 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.357005169565628e-06, "loss": 0.6063, "step": 9533 }, { "epoch": 1.72, "grad_norm": 0.0, "learning_rate": 8.355037771627059e-06, "loss": 0.5533, "step": 9534 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.353070439127907e-06, "loss": 0.6686, "step": 9535 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.351103172146428e-06, "loss": 0.5937, "step": 9536 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.349135970760893e-06, "loss": 0.7251, "step": 9537 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.347168835049557e-06, "loss": 0.7616, "step": 9538 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.345201765090677e-06, "loss": 0.6321, "step": 9539 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.343234760962504e-06, "loss": 0.636, "step": 9540 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.34126782274329e-06, "loss": 0.5476, "step": 9541 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.339300950511286e-06, "loss": 0.6097, "step": 9542 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.337334144344736e-06, "loss": 0.7044, "step": 9543 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.33536740432188e-06, "loss": 0.5913, "step": 9544 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.333400730520959e-06, "loss": 0.6064, "step": 9545 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.331434123020213e-06, "loss": 0.5913, "step": 9546 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.329467581897876e-06, "loss": 0.5891, "step": 9547 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.32750110723218e-06, "loss": 0.5752, "step": 9548 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.325534699101357e-06, "loss": 0.6136, "step": 9549 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.323568357583632e-06, "loss": 0.5916, "step": 9550 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.32160208275723e-06, "loss": 0.6869, "step": 9551 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.31963587470037e-06, "loss": 0.5737, "step": 9552 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.317669733491274e-06, "loss": 0.5651, "step": 9553 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.315703659208157e-06, "loss": 0.4911, "step": 9554 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.313737651929238e-06, "loss": 0.6006, "step": 9555 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.311771711732718e-06, "loss": 0.6093, "step": 9556 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.309805838696815e-06, "loss": 0.6464, "step": 9557 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.307840032899733e-06, "loss": 0.7196, "step": 9558 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.30587429441967e-06, "loss": 0.5897, "step": 9559 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.303908623334833e-06, "loss": 0.6006, "step": 9560 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.301943019723413e-06, "loss": 0.6485, "step": 9561 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.29997748366361e-06, "loss": 0.535, "step": 9562 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.298012015233615e-06, "loss": 0.6791, "step": 9563 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.296046614511616e-06, "loss": 0.6537, "step": 9564 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.294081281575807e-06, "loss": 0.6816, "step": 9565 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.292116016504363e-06, "loss": 0.6743, "step": 9566 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.29015081937547e-06, "loss": 0.69, "step": 9567 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.288185690267307e-06, "loss": 0.6178, "step": 9568 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.286220629258047e-06, "loss": 0.5702, "step": 9569 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.28425563642587e-06, "loss": 0.7371, "step": 9570 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.282290711848941e-06, "loss": 0.7484, "step": 9571 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.280325855605428e-06, "loss": 0.5873, "step": 9572 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.278361067773507e-06, "loss": 0.6372, "step": 9573 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.276396348431324e-06, "loss": 0.6051, "step": 9574 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.27443169765705e-06, "loss": 0.5829, "step": 9575 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.272467115528838e-06, "loss": 0.603, "step": 9576 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.270502602124843e-06, "loss": 0.6389, "step": 9577 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.268538157523218e-06, "loss": 0.6591, "step": 9578 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.266573781802113e-06, "loss": 0.6777, "step": 9579 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.26460947503967e-06, "loss": 0.6859, "step": 9580 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.262645237314038e-06, "loss": 0.6837, "step": 9581 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.26068106870335e-06, "loss": 0.6635, "step": 9582 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.258716969285753e-06, "loss": 0.6071, "step": 9583 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.256752939139377e-06, "loss": 0.6319, "step": 9584 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.254788978342354e-06, "loss": 0.5514, "step": 9585 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.252825086972815e-06, "loss": 0.6564, "step": 9586 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.250861265108887e-06, "loss": 0.6774, "step": 9587 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.248897512828698e-06, "loss": 0.5766, "step": 9588 }, { "epoch": 1.73, "grad_norm": 0.0, "learning_rate": 8.246933830210362e-06, "loss": 0.6577, "step": 9589 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.244970217332004e-06, "loss": 0.6647, "step": 9590 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.243006674271735e-06, "loss": 0.698, "step": 9591 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.24104320110767e-06, "loss": 0.6415, "step": 9592 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.239079797917921e-06, "loss": 0.6232, "step": 9593 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.237116464780592e-06, "loss": 0.5671, "step": 9594 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.235153201773794e-06, "loss": 0.6565, "step": 9595 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.233190008975624e-06, "loss": 0.5991, "step": 9596 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.231226886464182e-06, "loss": 0.7628, "step": 9597 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.229263834317565e-06, "loss": 0.7008, "step": 9598 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.227300852613863e-06, "loss": 0.6883, "step": 9599 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.225337941431171e-06, "loss": 0.5751, "step": 9600 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.223375100847577e-06, "loss": 0.6795, "step": 9601 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.221412330941163e-06, "loss": 0.6763, "step": 9602 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.219449631790019e-06, "loss": 0.7031, "step": 9603 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.217487003472213e-06, "loss": 0.6235, "step": 9604 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.215524446065831e-06, "loss": 0.6022, "step": 9605 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.213561959648941e-06, "loss": 0.6882, "step": 9606 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.211599544299616e-06, "loss": 0.6633, "step": 9607 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.209637200095926e-06, "loss": 0.6823, "step": 9608 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.207674927115936e-06, "loss": 0.5965, "step": 9609 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.205712725437706e-06, "loss": 0.5308, "step": 9610 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.203750595139304e-06, "loss": 0.6482, "step": 9611 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.201788536298774e-06, "loss": 0.5857, "step": 9612 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.199826548994178e-06, "loss": 0.6757, "step": 9613 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.197864633303566e-06, "loss": 0.76, "step": 9614 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.195902789304983e-06, "loss": 0.6207, "step": 9615 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.193941017076482e-06, "loss": 0.5893, "step": 9616 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.1919793166961e-06, "loss": 0.7387, "step": 9617 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.19001768824188e-06, "loss": 0.5892, "step": 9618 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.188056131791855e-06, "loss": 0.6068, "step": 9619 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.18609464742406e-06, "loss": 0.6361, "step": 9620 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.18413323521653e-06, "loss": 0.6445, "step": 9621 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.182171895247289e-06, "loss": 0.6484, "step": 9622 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.180210627594362e-06, "loss": 0.7546, "step": 9623 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.178249432335775e-06, "loss": 0.5797, "step": 9624 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.176288309549548e-06, "loss": 0.6131, "step": 9625 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.174327259313698e-06, "loss": 0.6736, "step": 9626 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.172366281706235e-06, "loss": 0.631, "step": 9627 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.17040537680517e-06, "loss": 0.5832, "step": 9628 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.168444544688516e-06, "loss": 0.5708, "step": 9629 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.166483785434274e-06, "loss": 0.5315, "step": 9630 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.16452309912045e-06, "loss": 0.5845, "step": 9631 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.162562485825039e-06, "loss": 0.5647, "step": 9632 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.160601945626041e-06, "loss": 0.6275, "step": 9633 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.158641478601453e-06, "loss": 0.7345, "step": 9634 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.156681084829256e-06, "loss": 0.6171, "step": 9635 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.154720764387446e-06, "loss": 0.61, "step": 9636 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.152760517354002e-06, "loss": 0.5424, "step": 9637 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.15080034380691e-06, "loss": 0.6166, "step": 9638 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.148840243824151e-06, "loss": 0.6119, "step": 9639 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.146880217483695e-06, "loss": 0.6155, "step": 9640 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.144920264863523e-06, "loss": 0.6519, "step": 9641 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.142960386041597e-06, "loss": 0.6636, "step": 9642 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.14100058109589e-06, "loss": 0.6323, "step": 9643 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.139040850104362e-06, "loss": 0.6479, "step": 9644 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 8.137081193144978e-06, "loss": 0.5153, "step": 9645 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.135121610295695e-06, "loss": 0.6063, "step": 9646 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.133162101634472e-06, "loss": 0.6473, "step": 9647 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.131202667239254e-06, "loss": 0.6034, "step": 9648 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.129243307188002e-06, "loss": 0.6751, "step": 9649 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.127284021558651e-06, "loss": 0.5766, "step": 9650 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.125324810429151e-06, "loss": 0.6371, "step": 9651 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.123365673877441e-06, "loss": 0.5712, "step": 9652 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.121406611981456e-06, "loss": 0.684, "step": 9653 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.11944762481914e-06, "loss": 0.6893, "step": 9654 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.117488712468416e-06, "loss": 0.5877, "step": 9655 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.11552987500722e-06, "loss": 0.6596, "step": 9656 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.11357111251347e-06, "loss": 0.5259, "step": 9657 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.111612425065094e-06, "loss": 0.7013, "step": 9658 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.10965381274001e-06, "loss": 0.6968, "step": 9659 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.107695275616135e-06, "loss": 0.5445, "step": 9660 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.105736813771383e-06, "loss": 0.6223, "step": 9661 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.103778427283668e-06, "loss": 0.6714, "step": 9662 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.101820116230893e-06, "loss": 0.6155, "step": 9663 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.09986188069097e-06, "loss": 0.6139, "step": 9664 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.097903720741794e-06, "loss": 0.5532, "step": 9665 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.095945636461264e-06, "loss": 0.5971, "step": 9666 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.093987627927283e-06, "loss": 0.6309, "step": 9667 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.092029695217735e-06, "loss": 0.571, "step": 9668 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.090071838410514e-06, "loss": 0.6521, "step": 9669 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.08811405758351e-06, "loss": 0.6474, "step": 9670 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.086156352814605e-06, "loss": 0.6423, "step": 9671 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.084198724181678e-06, "loss": 0.5952, "step": 9672 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.082241171762607e-06, "loss": 0.69, "step": 9673 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.080283695635267e-06, "loss": 0.5535, "step": 9674 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.078326295877529e-06, "loss": 0.5384, "step": 9675 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.076368972567267e-06, "loss": 0.6314, "step": 9676 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.074411725782341e-06, "loss": 0.5475, "step": 9677 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.072454555600611e-06, "loss": 0.6591, "step": 9678 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.070497462099948e-06, "loss": 0.6589, "step": 9679 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.068540445358198e-06, "loss": 0.6869, "step": 9680 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.066583505453217e-06, "loss": 0.62, "step": 9681 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.064626642462857e-06, "loss": 0.6114, "step": 9682 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.062669856464962e-06, "loss": 0.7681, "step": 9683 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.060713147537382e-06, "loss": 0.5957, "step": 9684 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.058756515757956e-06, "loss": 0.6157, "step": 9685 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.056799961204518e-06, "loss": 0.703, "step": 9686 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.054843483954913e-06, "loss": 0.5527, "step": 9687 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.05288708408696e-06, "loss": 0.6686, "step": 9688 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.050930761678496e-06, "loss": 0.6857, "step": 9689 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.048974516807347e-06, "loss": 0.6932, "step": 9690 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.04701834955133e-06, "loss": 0.6181, "step": 9691 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.04506225998827e-06, "loss": 0.6965, "step": 9692 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.043106248195984e-06, "loss": 0.6541, "step": 9693 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.041150314252286e-06, "loss": 0.5971, "step": 9694 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.039194458234983e-06, "loss": 0.6538, "step": 9695 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.037238680221879e-06, "loss": 0.6082, "step": 9696 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.035282980290786e-06, "loss": 0.5498, "step": 9697 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.033327358519502e-06, "loss": 0.638, "step": 9698 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.031371814985823e-06, "loss": 0.659, "step": 9699 }, { "epoch": 1.75, "grad_norm": 0.0, "learning_rate": 8.029416349767548e-06, "loss": 0.5291, "step": 9700 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.027460962942463e-06, "loss": 0.7585, "step": 9701 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.025505654588366e-06, "loss": 0.5565, "step": 9702 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.023550424783034e-06, "loss": 0.6379, "step": 9703 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.021595273604248e-06, "loss": 0.5136, "step": 9704 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.019640201129794e-06, "loss": 0.5129, "step": 9705 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.017685207437446e-06, "loss": 0.6983, "step": 9706 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.015730292604972e-06, "loss": 0.5624, "step": 9707 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.013775456710151e-06, "loss": 0.5619, "step": 9708 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.011820699830747e-06, "loss": 0.6438, "step": 9709 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.009866022044518e-06, "loss": 0.6356, "step": 9710 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.00791142342923e-06, "loss": 0.662, "step": 9711 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.005956904062634e-06, "loss": 0.6811, "step": 9712 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.00400246402249e-06, "loss": 0.5929, "step": 9713 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.00204810338655e-06, "loss": 0.656, "step": 9714 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 8.000093822232555e-06, "loss": 0.6502, "step": 9715 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.998139620638256e-06, "loss": 0.6465, "step": 9716 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.996185498681395e-06, "loss": 0.6289, "step": 9717 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.994231456439704e-06, "loss": 0.743, "step": 9718 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.992277493990924e-06, "loss": 0.52, "step": 9719 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.990323611412781e-06, "loss": 0.6128, "step": 9720 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.988369808783011e-06, "loss": 0.5943, "step": 9721 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.986416086179335e-06, "loss": 0.5972, "step": 9722 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.984462443679477e-06, "loss": 0.4835, "step": 9723 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.982508881361153e-06, "loss": 0.5506, "step": 9724 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.980555399302087e-06, "loss": 0.6029, "step": 9725 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.978601997579985e-06, "loss": 0.5764, "step": 9726 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.976648676272556e-06, "loss": 0.6155, "step": 9727 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.974695435457512e-06, "loss": 0.5917, "step": 9728 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.972742275212549e-06, "loss": 0.6146, "step": 9729 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.970789195615373e-06, "loss": 0.5873, "step": 9730 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.968836196743679e-06, "loss": 0.7765, "step": 9731 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.966883278675164e-06, "loss": 0.6503, "step": 9732 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.964930441487514e-06, "loss": 0.6778, "step": 9733 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.962977685258413e-06, "loss": 0.6216, "step": 9734 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.96102501006555e-06, "loss": 0.5843, "step": 9735 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.959072415986608e-06, "loss": 0.6997, "step": 9736 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.95711990309926e-06, "loss": 0.6088, "step": 9737 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.955167471481182e-06, "loss": 0.5991, "step": 9738 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.953215121210045e-06, "loss": 0.5698, "step": 9739 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.95126285236352e-06, "loss": 0.7443, "step": 9740 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.949310665019266e-06, "loss": 0.6453, "step": 9741 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.947358559254945e-06, "loss": 0.6032, "step": 9742 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.945406535148218e-06, "loss": 0.659, "step": 9743 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.94345459277674e-06, "loss": 0.7183, "step": 9744 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.941502732218157e-06, "loss": 0.6532, "step": 9745 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.939550953550126e-06, "loss": 0.6802, "step": 9746 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.937599256850289e-06, "loss": 0.6245, "step": 9747 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.935647642196285e-06, "loss": 0.5247, "step": 9748 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.933696109665754e-06, "loss": 0.5941, "step": 9749 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.931744659336329e-06, "loss": 0.5986, "step": 9750 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.929793291285648e-06, "loss": 0.6177, "step": 9751 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.927842005591334e-06, "loss": 0.6852, "step": 9752 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.925890802331015e-06, "loss": 0.5608, "step": 9753 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.923939681582314e-06, "loss": 0.7631, "step": 9754 }, { "epoch": 1.76, "grad_norm": 0.0, "learning_rate": 7.92198864342285e-06, "loss": 0.5774, "step": 9755 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.920037687930239e-06, "loss": 0.6989, "step": 9756 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.918086815182089e-06, "loss": 0.6123, "step": 9757 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.91613602525601e-06, "loss": 0.5948, "step": 9758 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.914185318229614e-06, "loss": 0.6493, "step": 9759 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.912234694180497e-06, "loss": 0.6398, "step": 9760 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.91028415318626e-06, "loss": 0.5944, "step": 9761 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.908333695324504e-06, "loss": 0.6473, "step": 9762 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.90638332067281e-06, "loss": 0.5701, "step": 9763 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.904433029308779e-06, "loss": 0.661, "step": 9764 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.90248282130999e-06, "loss": 0.7098, "step": 9765 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.900532696754026e-06, "loss": 0.569, "step": 9766 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.898582655718469e-06, "loss": 0.5626, "step": 9767 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.896632698280894e-06, "loss": 0.5233, "step": 9768 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.894682824518876e-06, "loss": 0.6371, "step": 9769 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.892733034509982e-06, "loss": 0.5859, "step": 9770 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.890783328331774e-06, "loss": 0.6394, "step": 9771 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.88883370606182e-06, "loss": 0.7056, "step": 9772 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.88688416777768e-06, "loss": 0.6961, "step": 9773 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.884934713556904e-06, "loss": 0.6281, "step": 9774 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.882985343477049e-06, "loss": 0.632, "step": 9775 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.881036057615665e-06, "loss": 0.6449, "step": 9776 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.879086856050298e-06, "loss": 0.6415, "step": 9777 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.877137738858489e-06, "loss": 0.6285, "step": 9778 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.875188706117777e-06, "loss": 0.6616, "step": 9779 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.873239757905695e-06, "loss": 0.6069, "step": 9780 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.87129089429978e-06, "loss": 0.5975, "step": 9781 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.869342115377562e-06, "loss": 0.6726, "step": 9782 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.867393421216562e-06, "loss": 0.6119, "step": 9783 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.865444811894307e-06, "loss": 0.613, "step": 9784 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.863496287488316e-06, "loss": 0.6569, "step": 9785 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.861547848076102e-06, "loss": 0.6975, "step": 9786 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.859599493735177e-06, "loss": 0.678, "step": 9787 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.857651224543047e-06, "loss": 0.5718, "step": 9788 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.855703040577227e-06, "loss": 0.6237, "step": 9789 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.85375494191521e-06, "loss": 0.7106, "step": 9790 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.851806928634498e-06, "loss": 0.5909, "step": 9791 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.849859000812588e-06, "loss": 0.6926, "step": 9792 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.847911158526973e-06, "loss": 0.633, "step": 9793 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.845963401855135e-06, "loss": 0.6131, "step": 9794 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.844015730874565e-06, "loss": 0.748, "step": 9795 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.84206814566274e-06, "loss": 0.5488, "step": 9796 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.840120646297143e-06, "loss": 0.7313, "step": 9797 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.838173232855246e-06, "loss": 0.6265, "step": 9798 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.836225905414518e-06, "loss": 0.6239, "step": 9799 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.83427866405244e-06, "loss": 0.5821, "step": 9800 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.832331508846459e-06, "loss": 0.5029, "step": 9801 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.830384439874045e-06, "loss": 0.4943, "step": 9802 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.828437457212655e-06, "loss": 0.6718, "step": 9803 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.82649056093974e-06, "loss": 0.618, "step": 9804 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.824543751132758e-06, "loss": 0.653, "step": 9805 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.822597027869151e-06, "loss": 0.6721, "step": 9806 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.820650391226364e-06, "loss": 0.6009, "step": 9807 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.818703841281844e-06, "loss": 0.6696, "step": 9808 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.816757378113014e-06, "loss": 0.6874, "step": 9809 }, { "epoch": 1.77, "grad_norm": 0.0, "learning_rate": 7.81481100179732e-06, "loss": 0.6166, "step": 9810 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.812864712412184e-06, "loss": 0.5904, "step": 9811 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.810918510035039e-06, "loss": 0.7513, "step": 9812 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.808972394743304e-06, "loss": 0.5333, "step": 9813 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.807026366614402e-06, "loss": 0.6144, "step": 9814 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.80508042572575e-06, "loss": 0.6617, "step": 9815 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.803134572154758e-06, "loss": 0.6307, "step": 9816 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.801188805978832e-06, "loss": 0.6094, "step": 9817 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.799243127275385e-06, "loss": 0.5371, "step": 9818 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.797297536121817e-06, "loss": 0.5517, "step": 9819 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.795352032595527e-06, "loss": 0.5393, "step": 9820 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.793406616773907e-06, "loss": 0.6632, "step": 9821 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.791461288734353e-06, "loss": 0.6529, "step": 9822 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.789516048554255e-06, "loss": 0.6117, "step": 9823 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.787570896310994e-06, "loss": 0.5815, "step": 9824 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.785625832081952e-06, "loss": 0.7202, "step": 9825 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.783680855944506e-06, "loss": 0.6494, "step": 9826 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.781735967976034e-06, "loss": 0.5447, "step": 9827 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.779791168253908e-06, "loss": 0.6681, "step": 9828 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.777846456855487e-06, "loss": 0.6981, "step": 9829 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.775901833858143e-06, "loss": 0.637, "step": 9830 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.773957299339239e-06, "loss": 0.7023, "step": 9831 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.772012853376124e-06, "loss": 0.5418, "step": 9832 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.770068496046154e-06, "loss": 0.5327, "step": 9833 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.768124227426676e-06, "loss": 0.6705, "step": 9834 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.766180047595043e-06, "loss": 0.6456, "step": 9835 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.764235956628596e-06, "loss": 0.5966, "step": 9836 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.762291954604668e-06, "loss": 0.7146, "step": 9837 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.760348041600607e-06, "loss": 0.5826, "step": 9838 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.758404217693731e-06, "loss": 0.5266, "step": 9839 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.756460482961376e-06, "loss": 0.6905, "step": 9840 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.754516837480869e-06, "loss": 0.5746, "step": 9841 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.752573281329526e-06, "loss": 0.5991, "step": 9842 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.75062981458467e-06, "loss": 0.5657, "step": 9843 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.748686437323613e-06, "loss": 0.7254, "step": 9844 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.746743149623663e-06, "loss": 0.6917, "step": 9845 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.744799951562139e-06, "loss": 0.5812, "step": 9846 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.742856843216328e-06, "loss": 0.4837, "step": 9847 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.740913824663544e-06, "loss": 0.7061, "step": 9848 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.738970895981073e-06, "loss": 0.5076, "step": 9849 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.737028057246214e-06, "loss": 0.6536, "step": 9850 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.73508530853626e-06, "loss": 0.6533, "step": 9851 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.733142649928488e-06, "loss": 0.6665, "step": 9852 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.731200081500189e-06, "loss": 0.6148, "step": 9853 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.729257603328636e-06, "loss": 0.6039, "step": 9854 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.727315215491102e-06, "loss": 0.6135, "step": 9855 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.725372918064863e-06, "loss": 0.6735, "step": 9856 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.723430711127189e-06, "loss": 0.5512, "step": 9857 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.72148859475534e-06, "loss": 0.68, "step": 9858 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.719546569026575e-06, "loss": 0.6505, "step": 9859 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.717604634018157e-06, "loss": 0.7015, "step": 9860 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.71566278980734e-06, "loss": 0.5914, "step": 9861 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.713721036471366e-06, "loss": 0.6107, "step": 9862 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.711779374087487e-06, "loss": 0.6465, "step": 9863 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.709837802732943e-06, "loss": 0.6126, "step": 9864 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.707896322484974e-06, "loss": 0.5155, "step": 9865 }, { "epoch": 1.78, "grad_norm": 0.0, "learning_rate": 7.705954933420819e-06, "loss": 0.674, "step": 9866 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.704013635617701e-06, "loss": 0.764, "step": 9867 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.702072429152863e-06, "loss": 0.5664, "step": 9868 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.700131314103514e-06, "loss": 0.5469, "step": 9869 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.698190290546883e-06, "loss": 0.5607, "step": 9870 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.696249358560185e-06, "loss": 0.6278, "step": 9871 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.694308518220633e-06, "loss": 0.6388, "step": 9872 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.692367769605438e-06, "loss": 0.5867, "step": 9873 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.690427112791807e-06, "loss": 0.5788, "step": 9874 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.688486547856942e-06, "loss": 0.6041, "step": 9875 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.686546074878045e-06, "loss": 0.5462, "step": 9876 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.684605693932306e-06, "loss": 0.6391, "step": 9877 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.682665405096917e-06, "loss": 0.5819, "step": 9878 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.68072520844907e-06, "loss": 0.6958, "step": 9879 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.678785104065945e-06, "loss": 0.6647, "step": 9880 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.676845092024728e-06, "loss": 0.7701, "step": 9881 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.674905172402591e-06, "loss": 0.6483, "step": 9882 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.67296534527671e-06, "loss": 0.5738, "step": 9883 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.671025610724258e-06, "loss": 0.6897, "step": 9884 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.669085968822392e-06, "loss": 0.5564, "step": 9885 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.66714641964828e-06, "loss": 0.6858, "step": 9886 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.66520696327908e-06, "loss": 0.6725, "step": 9887 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.663267599791946e-06, "loss": 0.6049, "step": 9888 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.66132832926403e-06, "loss": 0.709, "step": 9889 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.659389151772481e-06, "loss": 0.6782, "step": 9890 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.657450067394443e-06, "loss": 0.596, "step": 9891 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.655511076207053e-06, "loss": 0.6116, "step": 9892 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.653572178287444e-06, "loss": 0.6279, "step": 9893 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.651633373712759e-06, "loss": 0.5945, "step": 9894 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.64969466256012e-06, "loss": 0.6422, "step": 9895 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.64775604490665e-06, "loss": 0.6572, "step": 9896 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.645817520829479e-06, "loss": 0.746, "step": 9897 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.643879090405718e-06, "loss": 0.6524, "step": 9898 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.641940753712486e-06, "loss": 0.7045, "step": 9899 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.640002510826887e-06, "loss": 0.6457, "step": 9900 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.63806436182603e-06, "loss": 0.6116, "step": 9901 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.63612630678702e-06, "loss": 0.5483, "step": 9902 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.634188345786956e-06, "loss": 0.7363, "step": 9903 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.632250478902933e-06, "loss": 0.6985, "step": 9904 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.63031270621204e-06, "loss": 0.6563, "step": 9905 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.628375027791369e-06, "loss": 0.665, "step": 9906 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.626437443718002e-06, "loss": 0.5971, "step": 9907 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.62449995406902e-06, "loss": 0.5634, "step": 9908 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.622562558921498e-06, "loss": 0.6505, "step": 9909 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.620625258352508e-06, "loss": 0.5887, "step": 9910 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.618688052439125e-06, "loss": 0.6869, "step": 9911 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.61675094125841e-06, "loss": 0.6764, "step": 9912 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.614813924887423e-06, "loss": 0.5899, "step": 9913 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.612877003403231e-06, "loss": 0.6419, "step": 9914 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.610940176882874e-06, "loss": 0.6132, "step": 9915 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.609003445403411e-06, "loss": 0.5984, "step": 9916 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.607066809041887e-06, "loss": 0.5836, "step": 9917 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.605130267875345e-06, "loss": 0.6296, "step": 9918 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.603193821980823e-06, "loss": 0.7094, "step": 9919 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.601257471435358e-06, "loss": 0.5493, "step": 9920 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 7.599321216315982e-06, "loss": 0.6892, "step": 9921 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.597385056699719e-06, "loss": 0.633, "step": 9922 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.595448992663591e-06, "loss": 0.7103, "step": 9923 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.593513024284624e-06, "loss": 0.6146, "step": 9924 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.591577151639832e-06, "loss": 0.5608, "step": 9925 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.589641374806223e-06, "loss": 0.688, "step": 9926 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.587705693860813e-06, "loss": 0.5503, "step": 9927 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.585770108880602e-06, "loss": 0.6992, "step": 9928 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.583834619942595e-06, "loss": 0.6251, "step": 9929 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.581899227123784e-06, "loss": 0.6358, "step": 9930 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.579963930501161e-06, "loss": 0.5964, "step": 9931 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5780287301517205e-06, "loss": 0.5576, "step": 9932 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.576093626152448e-06, "loss": 0.7008, "step": 9933 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.57415861858032e-06, "loss": 0.6301, "step": 9934 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.572223707512319e-06, "loss": 0.5804, "step": 9935 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.570288893025419e-06, "loss": 0.5814, "step": 9936 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.568354175196592e-06, "loss": 0.5077, "step": 9937 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.566419554102798e-06, "loss": 0.6031, "step": 9938 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.564485029821003e-06, "loss": 0.7209, "step": 9939 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.562550602428166e-06, "loss": 0.6041, "step": 9940 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.560616272001242e-06, "loss": 0.5474, "step": 9941 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.55868203861718e-06, "loss": 0.6723, "step": 9942 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.55674790235293e-06, "loss": 0.5235, "step": 9943 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5548138632854375e-06, "loss": 0.6407, "step": 9944 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5528799214916345e-06, "loss": 0.7141, "step": 9945 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5509460770484615e-06, "loss": 0.6438, "step": 9946 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.549012330032847e-06, "loss": 0.676, "step": 9947 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.547078680521722e-06, "loss": 0.6355, "step": 9948 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.545145128592009e-06, "loss": 0.5822, "step": 9949 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.54321167432063e-06, "loss": 0.5972, "step": 9950 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.541278317784495e-06, "loss": 0.5841, "step": 9951 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5393450590605265e-06, "loss": 0.6461, "step": 9952 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.537411898225624e-06, "loss": 0.6453, "step": 9953 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.535478835356695e-06, "loss": 0.7222, "step": 9954 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.53354587053064e-06, "loss": 0.7102, "step": 9955 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.531613003824353e-06, "loss": 0.5879, "step": 9956 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.529680235314732e-06, "loss": 0.5888, "step": 9957 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.527747565078663e-06, "loss": 0.5916, "step": 9958 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5258149931930325e-06, "loss": 0.5756, "step": 9959 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.523882519734718e-06, "loss": 0.5591, "step": 9960 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.521950144780597e-06, "loss": 0.5611, "step": 9961 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5200178684075465e-06, "loss": 0.5801, "step": 9962 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.518085690692434e-06, "loss": 0.6256, "step": 9963 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5161536117121215e-06, "loss": 0.6277, "step": 9964 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.514221631543477e-06, "loss": 0.699, "step": 9965 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.512289750263353e-06, "loss": 0.6357, "step": 9966 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.510357967948607e-06, "loss": 0.6123, "step": 9967 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.508426284676084e-06, "loss": 0.6844, "step": 9968 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5064947005226285e-06, "loss": 0.7357, "step": 9969 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.504563215565088e-06, "loss": 0.6639, "step": 9970 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.5026318298802994e-06, "loss": 0.6115, "step": 9971 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.500700543545091e-06, "loss": 0.6137, "step": 9972 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.4987693566363e-06, "loss": 0.6721, "step": 9973 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.4968382692307485e-06, "loss": 0.6219, "step": 9974 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.494907281405259e-06, "loss": 0.629, "step": 9975 }, { "epoch": 1.8, "grad_norm": 0.0, "learning_rate": 7.492976393236649e-06, "loss": 0.6031, "step": 9976 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.49104560480173e-06, "loss": 0.6352, "step": 9977 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.4891149161773165e-06, "loss": 0.5363, "step": 9978 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.4871843274402145e-06, "loss": 0.6236, "step": 9979 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.485253838667221e-06, "loss": 0.7904, "step": 9980 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.483323449935141e-06, "loss": 0.6568, "step": 9981 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.481393161320768e-06, "loss": 0.6442, "step": 9982 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.479462972900886e-06, "loss": 0.5925, "step": 9983 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.477532884752287e-06, "loss": 0.6508, "step": 9984 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.475602896951748e-06, "loss": 0.653, "step": 9985 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.473673009576051e-06, "loss": 0.5819, "step": 9986 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.47174322270197e-06, "loss": 0.5832, "step": 9987 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.469813536406274e-06, "loss": 0.6472, "step": 9988 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.467883950765732e-06, "loss": 0.6957, "step": 9989 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.4659544658571055e-06, "loss": 0.5506, "step": 9990 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.464025081757149e-06, "loss": 0.6526, "step": 9991 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.46209579854262e-06, "loss": 0.4985, "step": 9992 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.460166616290266e-06, "loss": 0.68, "step": 9993 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.4582375350768355e-06, "loss": 0.6688, "step": 9994 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.4563085549790705e-06, "loss": 0.7288, "step": 9995 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.454379676073709e-06, "loss": 0.5888, "step": 9996 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.452450898437489e-06, "loss": 0.5876, "step": 9997 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.450522222147129e-06, "loss": 0.5767, "step": 9998 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.448593647279367e-06, "loss": 0.5434, "step": 9999 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.446665173910919e-06, "loss": 0.6246, "step": 10000 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.444736802118505e-06, "loss": 0.6688, "step": 10001 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.442808531978837e-06, "loss": 0.5708, "step": 10002 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.440880363568628e-06, "loss": 0.5981, "step": 10003 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.438952296964581e-06, "loss": 0.628, "step": 10004 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.437024332243403e-06, "loss": 0.5812, "step": 10005 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.435096469481784e-06, "loss": 0.6341, "step": 10006 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.43316870875642e-06, "loss": 0.7238, "step": 10007 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.431241050144005e-06, "loss": 0.563, "step": 10008 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.42931349372122e-06, "loss": 0.5616, "step": 10009 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.427386039564746e-06, "loss": 0.6036, "step": 10010 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.425458687751266e-06, "loss": 0.6658, "step": 10011 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.42353143835745e-06, "loss": 0.6317, "step": 10012 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.421604291459966e-06, "loss": 0.5161, "step": 10013 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.419677247135481e-06, "loss": 0.6599, "step": 10014 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.417750305460651e-06, "loss": 0.5376, "step": 10015 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.415823466512142e-06, "loss": 0.6261, "step": 10016 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.413896730366602e-06, "loss": 0.6722, "step": 10017 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.411970097100678e-06, "loss": 0.6115, "step": 10018 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.410043566791019e-06, "loss": 0.6573, "step": 10019 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.408117139514266e-06, "loss": 0.5475, "step": 10020 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.406190815347052e-06, "loss": 0.6123, "step": 10021 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.4042645943660085e-06, "loss": 0.5395, "step": 10022 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.4023384766477656e-06, "loss": 0.6344, "step": 10023 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.400412462268951e-06, "loss": 0.6539, "step": 10024 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.398486551306181e-06, "loss": 0.517, "step": 10025 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.39656074383607e-06, "loss": 0.6033, "step": 10026 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.3946350399352405e-06, "loss": 0.6522, "step": 10027 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.392709439680285e-06, "loss": 0.6662, "step": 10028 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.390783943147817e-06, "loss": 0.5875, "step": 10029 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.388858550414433e-06, "loss": 0.6883, "step": 10030 }, { "epoch": 1.81, "grad_norm": 0.0, "learning_rate": 7.386933261556727e-06, "loss": 0.6438, "step": 10031 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.385008076651294e-06, "loss": 0.5969, "step": 10032 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.383082995774721e-06, "loss": 0.5498, "step": 10033 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.3811580190035855e-06, "loss": 0.7481, "step": 10034 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.379233146414477e-06, "loss": 0.6432, "step": 10035 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.377308378083958e-06, "loss": 0.7525, "step": 10036 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.375383714088608e-06, "loss": 0.6196, "step": 10037 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.373459154504987e-06, "loss": 0.715, "step": 10038 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.371534699409662e-06, "loss": 0.7215, "step": 10039 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.369610348879188e-06, "loss": 0.6158, "step": 10040 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.367686102990122e-06, "loss": 0.6416, "step": 10041 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.365761961819012e-06, "loss": 0.5794, "step": 10042 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.363837925442405e-06, "loss": 0.5304, "step": 10043 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.361913993936839e-06, "loss": 0.5407, "step": 10044 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.359990167378854e-06, "loss": 0.5586, "step": 10045 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.358066445844985e-06, "loss": 0.5984, "step": 10046 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.356142829411756e-06, "loss": 0.6675, "step": 10047 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.354219318155695e-06, "loss": 0.611, "step": 10048 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.352295912153323e-06, "loss": 0.6863, "step": 10049 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.350372611481156e-06, "loss": 0.6547, "step": 10050 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.348449416215705e-06, "loss": 0.6777, "step": 10051 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.346526326433478e-06, "loss": 0.6172, "step": 10052 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.344603342210978e-06, "loss": 0.6133, "step": 10053 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.342680463624707e-06, "loss": 0.6553, "step": 10054 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.34075769075116e-06, "loss": 0.5815, "step": 10055 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.338835023666826e-06, "loss": 0.7432, "step": 10056 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.3369124624481945e-06, "loss": 0.6031, "step": 10057 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.33499000717175e-06, "loss": 0.5673, "step": 10058 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.333067657913967e-06, "loss": 0.6001, "step": 10059 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.3311454147513205e-06, "loss": 0.5866, "step": 10060 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.32922327776028e-06, "loss": 0.6049, "step": 10061 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.327301247017314e-06, "loss": 0.6669, "step": 10062 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.325379322598885e-06, "loss": 0.6084, "step": 10063 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.3234575045814435e-06, "loss": 0.6432, "step": 10064 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.321535793041455e-06, "loss": 0.661, "step": 10065 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.319614188055355e-06, "loss": 0.6533, "step": 10066 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.317692689699596e-06, "loss": 0.5842, "step": 10067 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.315771298050618e-06, "loss": 0.6448, "step": 10068 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.313850013184853e-06, "loss": 0.6298, "step": 10069 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.311928835178739e-06, "loss": 0.5257, "step": 10070 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.3100077641087e-06, "loss": 0.6965, "step": 10071 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.30808680005116e-06, "loss": 0.6466, "step": 10072 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.306165943082543e-06, "loss": 0.5744, "step": 10073 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.3042451932792534e-06, "loss": 0.5786, "step": 10074 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.3023245507177095e-06, "loss": 0.6082, "step": 10075 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.300404015474318e-06, "loss": 0.6834, "step": 10076 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.2984835876254775e-06, "loss": 0.627, "step": 10077 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.29656326724759e-06, "loss": 0.627, "step": 10078 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.294643054417045e-06, "loss": 0.6169, "step": 10079 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.2927229492102384e-06, "loss": 0.5895, "step": 10080 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.290802951703546e-06, "loss": 0.6296, "step": 10081 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.288883061973353e-06, "loss": 0.6583, "step": 10082 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.286963280096038e-06, "loss": 0.6424, "step": 10083 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.2850436061479704e-06, "loss": 0.6418, "step": 10084 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.283124040205518e-06, "loss": 0.6086, "step": 10085 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.281204582345047e-06, "loss": 0.5919, "step": 10086 }, { "epoch": 1.82, "grad_norm": 0.0, "learning_rate": 7.279285232642916e-06, "loss": 0.6415, "step": 10087 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.277365991175482e-06, "loss": 0.6589, "step": 10088 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.27544685801909e-06, "loss": 0.6228, "step": 10089 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.273527833250087e-06, "loss": 0.6274, "step": 10090 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.27160891694482e-06, "loss": 0.618, "step": 10091 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.269690109179625e-06, "loss": 0.697, "step": 10092 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.267771410030832e-06, "loss": 0.5966, "step": 10093 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.265852819574775e-06, "loss": 0.7538, "step": 10094 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.263934337887776e-06, "loss": 0.7032, "step": 10095 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.2620159650461585e-06, "loss": 0.6075, "step": 10096 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.260097701126234e-06, "loss": 0.5796, "step": 10097 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.258179546204318e-06, "loss": 0.6492, "step": 10098 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.2562615003567135e-06, "loss": 0.5921, "step": 10099 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.254343563659729e-06, "loss": 0.6122, "step": 10100 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.252425736189661e-06, "loss": 0.6458, "step": 10101 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.2505080180228025e-06, "loss": 0.6314, "step": 10102 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.248590409235452e-06, "loss": 0.6277, "step": 10103 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.24667290990388e-06, "loss": 0.6349, "step": 10104 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.244755520104381e-06, "loss": 0.6239, "step": 10105 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.242838239913226e-06, "loss": 0.6772, "step": 10106 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.240921069406688e-06, "loss": 0.6173, "step": 10107 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.239004008661037e-06, "loss": 0.6382, "step": 10108 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.237087057752537e-06, "loss": 0.5687, "step": 10109 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.235170216757446e-06, "loss": 0.6208, "step": 10110 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.233253485752025e-06, "loss": 0.629, "step": 10111 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.231336864812513e-06, "loss": 0.6102, "step": 10112 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.229420354015168e-06, "loss": 0.5964, "step": 10113 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.2275039534362254e-06, "loss": 0.5947, "step": 10114 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.225587663151925e-06, "loss": 0.6409, "step": 10115 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.223671483238502e-06, "loss": 0.6031, "step": 10116 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.221755413772182e-06, "loss": 0.6464, "step": 10117 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.219839454829194e-06, "loss": 0.5846, "step": 10118 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.2179236064857525e-06, "loss": 0.7135, "step": 10119 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.216007868818076e-06, "loss": 0.5732, "step": 10120 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.214092241902375e-06, "loss": 0.5782, "step": 10121 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.212176725814859e-06, "loss": 0.6308, "step": 10122 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.210261320631725e-06, "loss": 0.734, "step": 10123 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.208346026429178e-06, "loss": 0.6546, "step": 10124 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.206430843283407e-06, "loss": 0.6684, "step": 10125 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.2045157712706075e-06, "loss": 0.6939, "step": 10126 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.202600810466955e-06, "loss": 0.6099, "step": 10127 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.200685960948633e-06, "loss": 0.7331, "step": 10128 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.198771222791821e-06, "loss": 0.5349, "step": 10129 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.19685659607269e-06, "loss": 0.7118, "step": 10130 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.194942080867402e-06, "loss": 0.6252, "step": 10131 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.193027677252126e-06, "loss": 0.6063, "step": 10132 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.191113385303019e-06, "loss": 0.7049, "step": 10133 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.189199205096232e-06, "loss": 0.6342, "step": 10134 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.1872851367079155e-06, "loss": 0.6548, "step": 10135 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.185371180214214e-06, "loss": 0.5917, "step": 10136 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.1834573356912685e-06, "loss": 0.5535, "step": 10137 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.181543603215217e-06, "loss": 0.5836, "step": 10138 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.1796299828621884e-06, "loss": 0.6695, "step": 10139 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.177716474708309e-06, "loss": 0.5955, "step": 10140 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.175803078829706e-06, "loss": 0.538, "step": 10141 }, { "epoch": 1.83, "grad_norm": 0.0, "learning_rate": 7.173889795302494e-06, "loss": 0.5692, "step": 10142 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.1719766242027865e-06, "loss": 0.6891, "step": 10143 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.170063565606694e-06, "loss": 0.7012, "step": 10144 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.168150619590318e-06, "loss": 0.5748, "step": 10145 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.166237786229765e-06, "loss": 0.7155, "step": 10146 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.1643250656011265e-06, "loss": 0.5785, "step": 10147 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.162412457780492e-06, "loss": 0.6233, "step": 10148 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.160499962843959e-06, "loss": 0.6609, "step": 10149 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.1585875808675945e-06, "loss": 0.7399, "step": 10150 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.156675311927485e-06, "loss": 0.6404, "step": 10151 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.1547631560997045e-06, "loss": 0.6836, "step": 10152 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.152851113460315e-06, "loss": 0.7083, "step": 10153 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.1509391840853905e-06, "loss": 0.5804, "step": 10154 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.149027368050984e-06, "loss": 0.5954, "step": 10155 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.147115665433155e-06, "loss": 0.7411, "step": 10156 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.145204076307951e-06, "loss": 0.5872, "step": 10157 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.143292600751417e-06, "loss": 0.6047, "step": 10158 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.141381238839598e-06, "loss": 0.5991, "step": 10159 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.139469990648532e-06, "loss": 0.5813, "step": 10160 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.137558856254248e-06, "loss": 0.6434, "step": 10161 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.135647835732777e-06, "loss": 0.595, "step": 10162 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.133736929160143e-06, "loss": 0.657, "step": 10163 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.131826136612367e-06, "loss": 0.6483, "step": 10164 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.129915458165457e-06, "loss": 0.7059, "step": 10165 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.128004893895426e-06, "loss": 0.6875, "step": 10166 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.126094443878282e-06, "loss": 0.6381, "step": 10167 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.124184108190024e-06, "loss": 0.6299, "step": 10168 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.122273886906648e-06, "loss": 0.6895, "step": 10169 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.120363780104147e-06, "loss": 0.6118, "step": 10170 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.11845378785851e-06, "loss": 0.6438, "step": 10171 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.1165439102457164e-06, "loss": 0.5872, "step": 10172 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.1146341473417455e-06, "loss": 0.7121, "step": 10173 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.112724499222569e-06, "loss": 0.594, "step": 10174 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.110814965964159e-06, "loss": 0.5739, "step": 10175 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.108905547642482e-06, "loss": 0.5716, "step": 10176 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.106996244333491e-06, "loss": 0.5919, "step": 10177 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.105087056113147e-06, "loss": 0.6219, "step": 10178 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.103177983057401e-06, "loss": 0.6259, "step": 10179 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.101269025242197e-06, "loss": 0.6843, "step": 10180 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.0993601827434755e-06, "loss": 0.4911, "step": 10181 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.097451455637175e-06, "loss": 0.6634, "step": 10182 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.0955428439992276e-06, "loss": 0.7051, "step": 10183 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.093634347905562e-06, "loss": 0.5878, "step": 10184 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.091725967432101e-06, "loss": 0.6165, "step": 10185 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.089817702654766e-06, "loss": 0.6957, "step": 10186 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.087909553649465e-06, "loss": 0.6935, "step": 10187 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.086001520492111e-06, "loss": 0.5666, "step": 10188 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.084093603258608e-06, "loss": 0.6724, "step": 10189 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.082185802024859e-06, "loss": 0.6824, "step": 10190 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.080278116866753e-06, "loss": 0.6774, "step": 10191 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.078370547860188e-06, "loss": 0.5429, "step": 10192 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.076463095081048e-06, "loss": 0.7032, "step": 10193 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.0745557586052176e-06, "loss": 0.6162, "step": 10194 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.0726485385085666e-06, "loss": 0.5619, "step": 10195 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.0707414348669705e-06, "loss": 0.6341, "step": 10196 }, { "epoch": 1.84, "grad_norm": 0.0, "learning_rate": 7.068834447756299e-06, "loss": 0.4945, "step": 10197 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.066927577252416e-06, "loss": 0.5813, "step": 10198 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.0650208234311744e-06, "loss": 0.597, "step": 10199 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.063114186368437e-06, "loss": 0.5037, "step": 10200 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.0612076661400475e-06, "loss": 0.6627, "step": 10201 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.059301262821854e-06, "loss": 0.6849, "step": 10202 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.057394976489691e-06, "loss": 0.5349, "step": 10203 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.055488807219395e-06, "loss": 0.6463, "step": 10204 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.0535827550868005e-06, "loss": 0.5589, "step": 10205 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.051676820167732e-06, "loss": 0.6966, "step": 10206 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.0497710025380085e-06, "loss": 0.6204, "step": 10207 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.047865302273449e-06, "loss": 0.5671, "step": 10208 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.045959719449869e-06, "loss": 0.6072, "step": 10209 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.044054254143069e-06, "loss": 0.6902, "step": 10210 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.042148906428854e-06, "loss": 0.639, "step": 10211 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.04024367638302e-06, "loss": 0.6308, "step": 10212 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.038338564081367e-06, "loss": 0.7356, "step": 10213 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.036433569599679e-06, "loss": 0.5148, "step": 10214 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.034528693013738e-06, "loss": 0.5586, "step": 10215 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.032623934399328e-06, "loss": 0.5326, "step": 10216 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.0307192938322224e-06, "loss": 0.5832, "step": 10217 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.028814771388188e-06, "loss": 0.6229, "step": 10218 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.026910367142994e-06, "loss": 0.6194, "step": 10219 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.025006081172394e-06, "loss": 0.6415, "step": 10220 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.023101913552153e-06, "loss": 0.679, "step": 10221 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.021197864358016e-06, "loss": 0.5753, "step": 10222 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.0192939336657296e-06, "loss": 0.7036, "step": 10223 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.017390121551042e-06, "loss": 0.6487, "step": 10224 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.015486428089679e-06, "loss": 0.6144, "step": 10225 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.0135828533573814e-06, "loss": 0.4951, "step": 10226 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.011679397429873e-06, "loss": 0.5781, "step": 10227 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.009776060382877e-06, "loss": 0.5829, "step": 10228 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.007872842292113e-06, "loss": 0.5684, "step": 10229 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.005969743233293e-06, "loss": 0.63, "step": 10230 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.004066763282126e-06, "loss": 0.6946, "step": 10231 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.002163902514316e-06, "loss": 0.5317, "step": 10232 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 7.00026116100556e-06, "loss": 0.7088, "step": 10233 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.998358538831553e-06, "loss": 0.5421, "step": 10234 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.996456036067988e-06, "loss": 0.6306, "step": 10235 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.994553652790546e-06, "loss": 0.6304, "step": 10236 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.992651389074907e-06, "loss": 0.5803, "step": 10237 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.990749244996749e-06, "loss": 0.6164, "step": 10238 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.988847220631742e-06, "loss": 0.6846, "step": 10239 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.986945316055551e-06, "loss": 0.51, "step": 10240 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.985043531343836e-06, "loss": 0.669, "step": 10241 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.983141866572253e-06, "loss": 0.5313, "step": 10242 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.981240321816456e-06, "loss": 0.6297, "step": 10243 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.97933889715209e-06, "loss": 0.554, "step": 10244 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.977437592654797e-06, "loss": 0.6512, "step": 10245 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.975536408400214e-06, "loss": 0.6254, "step": 10246 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.973635344463978e-06, "loss": 0.609, "step": 10247 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.97173440092171e-06, "loss": 0.7496, "step": 10248 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.969833577849034e-06, "loss": 0.6063, "step": 10249 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.967932875321569e-06, "loss": 0.5975, "step": 10250 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.966032293414929e-06, "loss": 0.5424, "step": 10251 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.964131832204724e-06, "loss": 0.6073, "step": 10252 }, { "epoch": 1.85, "grad_norm": 0.0, "learning_rate": 6.962231491766551e-06, "loss": 0.6982, "step": 10253 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.960331272176016e-06, "loss": 0.6998, "step": 10254 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.958431173508713e-06, "loss": 0.6125, "step": 10255 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.956531195840226e-06, "loss": 0.6649, "step": 10256 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.954631339246141e-06, "loss": 0.5849, "step": 10257 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.952731603802037e-06, "loss": 0.571, "step": 10258 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.950831989583492e-06, "loss": 0.7057, "step": 10259 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.948932496666074e-06, "loss": 0.6255, "step": 10260 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.947033125125347e-06, "loss": 0.5554, "step": 10261 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.9451338750368755e-06, "loss": 0.6179, "step": 10262 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.943234746476208e-06, "loss": 0.5509, "step": 10263 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.941335739518901e-06, "loss": 0.6848, "step": 10264 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.939436854240498e-06, "loss": 0.6057, "step": 10265 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.937538090716536e-06, "loss": 0.7086, "step": 10266 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.935639449022559e-06, "loss": 0.6935, "step": 10267 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.933740929234094e-06, "loss": 0.6373, "step": 10268 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.9318425314266646e-06, "loss": 0.6492, "step": 10269 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.929944255675802e-06, "loss": 0.6964, "step": 10270 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.928046102057011e-06, "loss": 0.6228, "step": 10271 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.92614807064581e-06, "loss": 0.6513, "step": 10272 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.924250161517705e-06, "loss": 0.5949, "step": 10273 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.922352374748194e-06, "loss": 0.5998, "step": 10274 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.920454710412781e-06, "loss": 0.5392, "step": 10275 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.918557168586955e-06, "loss": 0.6246, "step": 10276 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.916659749346206e-06, "loss": 0.6749, "step": 10277 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.914762452766012e-06, "loss": 0.6683, "step": 10278 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.912865278921852e-06, "loss": 0.6451, "step": 10279 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.910968227889199e-06, "loss": 0.6385, "step": 10280 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.909071299743524e-06, "loss": 0.5128, "step": 10281 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.907174494560287e-06, "loss": 0.7201, "step": 10282 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.905277812414945e-06, "loss": 0.549, "step": 10283 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.903381253382955e-06, "loss": 0.5572, "step": 10284 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.901484817539765e-06, "loss": 0.7282, "step": 10285 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.899588504960817e-06, "loss": 0.6353, "step": 10286 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.897692315721547e-06, "loss": 0.5273, "step": 10287 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.895796249897391e-06, "loss": 0.661, "step": 10288 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.893900307563779e-06, "loss": 0.6002, "step": 10289 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.892004488796134e-06, "loss": 0.6119, "step": 10290 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.890108793669874e-06, "loss": 0.6502, "step": 10291 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.888213222260418e-06, "loss": 0.5801, "step": 10292 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.8863177746431664e-06, "loss": 0.7088, "step": 10293 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.884422450893529e-06, "loss": 0.7695, "step": 10294 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.8825272510869035e-06, "loss": 0.6406, "step": 10295 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.880632175298683e-06, "loss": 0.6097, "step": 10296 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.87873722360426e-06, "loss": 0.6496, "step": 10297 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.876842396079018e-06, "loss": 0.6091, "step": 10298 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.874947692798332e-06, "loss": 0.6119, "step": 10299 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.873053113837587e-06, "loss": 0.652, "step": 10300 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.871158659272141e-06, "loss": 0.6209, "step": 10301 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.869264329177365e-06, "loss": 0.6282, "step": 10302 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.867370123628616e-06, "loss": 0.69, "step": 10303 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.865476042701249e-06, "loss": 0.5885, "step": 10304 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.863582086470617e-06, "loss": 0.673, "step": 10305 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.861688255012062e-06, "loss": 0.5095, "step": 10306 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.859794548400922e-06, "loss": 0.5823, "step": 10307 }, { "epoch": 1.86, "grad_norm": 0.0, "learning_rate": 6.857900966712543e-06, "loss": 0.642, "step": 10308 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.856007510022238e-06, "loss": 0.5568, "step": 10309 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.854114178405342e-06, "loss": 0.6307, "step": 10310 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.8522209719371735e-06, "loss": 0.6173, "step": 10311 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.8503278906930445e-06, "loss": 0.5821, "step": 10312 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.848434934748272e-06, "loss": 0.57, "step": 10313 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.8465421041781554e-06, "loss": 0.5898, "step": 10314 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.844649399057998e-06, "loss": 0.6529, "step": 10315 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.84275681946309e-06, "loss": 0.6886, "step": 10316 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.8408643654687246e-06, "loss": 0.6076, "step": 10317 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.838972037150187e-06, "loss": 0.697, "step": 10318 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.837079834582756e-06, "loss": 0.6413, "step": 10319 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.83518775784171e-06, "loss": 0.534, "step": 10320 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.833295807002313e-06, "loss": 0.603, "step": 10321 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.831403982139836e-06, "loss": 0.7392, "step": 10322 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.829512283329539e-06, "loss": 0.739, "step": 10323 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.827620710646672e-06, "loss": 0.5891, "step": 10324 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.8257292641664886e-06, "loss": 0.6613, "step": 10325 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.823837943964231e-06, "loss": 0.6499, "step": 10326 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.8219467501151425e-06, "loss": 0.622, "step": 10327 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.820055682694456e-06, "loss": 0.5956, "step": 10328 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.818164741777401e-06, "loss": 0.5122, "step": 10329 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.81627392743921e-06, "loss": 0.5804, "step": 10330 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.814383239755088e-06, "loss": 0.756, "step": 10331 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.812492678800262e-06, "loss": 0.5669, "step": 10332 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.810602244649937e-06, "loss": 0.5935, "step": 10333 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.808711937379315e-06, "loss": 0.6317, "step": 10334 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.806821757063603e-06, "loss": 0.6406, "step": 10335 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.804931703777991e-06, "loss": 0.6328, "step": 10336 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.8030417775976675e-06, "loss": 0.6228, "step": 10337 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.801151978597825e-06, "loss": 0.7746, "step": 10338 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.799262306853631e-06, "loss": 0.6341, "step": 10339 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.797372762440266e-06, "loss": 0.5679, "step": 10340 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.7954833454329e-06, "loss": 0.537, "step": 10341 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.793594055906695e-06, "loss": 0.5708, "step": 10342 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.791704893936812e-06, "loss": 0.5979, "step": 10343 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.789815859598406e-06, "loss": 0.6113, "step": 10344 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.787926952966625e-06, "loss": 0.6004, "step": 10345 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.786038174116611e-06, "loss": 0.6503, "step": 10346 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.784149523123502e-06, "loss": 0.615, "step": 10347 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.782261000062436e-06, "loss": 0.6155, "step": 10348 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.78037260500854e-06, "loss": 0.5901, "step": 10349 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.778484338036935e-06, "loss": 0.5371, "step": 10350 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.7765961992227425e-06, "loss": 0.6233, "step": 10351 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.774708188641077e-06, "loss": 0.6145, "step": 10352 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.772820306367047e-06, "loss": 0.6089, "step": 10353 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.770932552475751e-06, "loss": 0.5608, "step": 10354 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.769044927042287e-06, "loss": 0.6904, "step": 10355 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.767157430141755e-06, "loss": 0.6086, "step": 10356 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.765270061849237e-06, "loss": 0.5099, "step": 10357 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.763382822239815e-06, "loss": 0.7073, "step": 10358 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.761495711388573e-06, "loss": 0.5605, "step": 10359 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.759608729370577e-06, "loss": 0.5233, "step": 10360 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.757721876260901e-06, "loss": 0.6087, "step": 10361 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.7558351521346e-06, "loss": 0.6537, "step": 10362 }, { "epoch": 1.87, "grad_norm": 0.0, "learning_rate": 6.7539485570667315e-06, "loss": 0.6163, "step": 10363 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.752062091132353e-06, "loss": 0.5848, "step": 10364 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.750175754406509e-06, "loss": 0.6579, "step": 10365 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.74828954696424e-06, "loss": 0.5801, "step": 10366 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.746403468880581e-06, "loss": 0.5775, "step": 10367 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.744517520230571e-06, "loss": 0.5621, "step": 10368 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.742631701089228e-06, "loss": 0.671, "step": 10369 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.740746011531576e-06, "loss": 0.562, "step": 10370 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.738860451632632e-06, "loss": 0.6574, "step": 10371 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.736975021467402e-06, "loss": 0.5997, "step": 10372 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.735089721110899e-06, "loss": 0.5135, "step": 10373 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.733204550638121e-06, "loss": 0.7029, "step": 10374 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.731319510124059e-06, "loss": 0.6135, "step": 10375 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.729434599643713e-06, "loss": 0.5674, "step": 10376 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.727549819272054e-06, "loss": 0.6698, "step": 10377 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.725665169084072e-06, "loss": 0.5829, "step": 10378 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.72378064915474e-06, "loss": 0.6699, "step": 10379 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.721896259559022e-06, "loss": 0.6001, "step": 10380 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.72001200037189e-06, "loss": 0.5098, "step": 10381 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.718127871668301e-06, "loss": 0.7277, "step": 10382 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.7162438735232076e-06, "loss": 0.639, "step": 10383 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.714360006011556e-06, "loss": 0.6148, "step": 10384 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.71247626920829e-06, "loss": 0.6833, "step": 10385 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.710592663188352e-06, "loss": 0.5936, "step": 10386 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.7087091880266745e-06, "loss": 0.5744, "step": 10387 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.706825843798179e-06, "loss": 0.6786, "step": 10388 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.704942630577797e-06, "loss": 0.533, "step": 10389 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.7030595484404415e-06, "loss": 0.6443, "step": 10390 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.701176597461027e-06, "loss": 0.772, "step": 10391 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.699293777714457e-06, "loss": 0.6556, "step": 10392 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.697411089275632e-06, "loss": 0.7055, "step": 10393 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.695528532219454e-06, "loss": 0.6163, "step": 10394 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.693646106620812e-06, "loss": 0.7139, "step": 10395 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.69176381255459e-06, "loss": 0.5222, "step": 10396 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.6898816500956735e-06, "loss": 0.6191, "step": 10397 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.6879996193189364e-06, "loss": 0.6284, "step": 10398 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.686117720299247e-06, "loss": 0.6135, "step": 10399 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.684235953111471e-06, "loss": 0.6383, "step": 10400 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.682354317830466e-06, "loss": 0.6334, "step": 10401 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.6804728145310936e-06, "loss": 0.6723, "step": 10402 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.678591443288199e-06, "loss": 0.5868, "step": 10403 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.676710204176624e-06, "loss": 0.7264, "step": 10404 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.674829097271214e-06, "loss": 0.5914, "step": 10405 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.6729481226467986e-06, "loss": 0.6895, "step": 10406 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.671067280378205e-06, "loss": 0.593, "step": 10407 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.669186570540258e-06, "loss": 0.6545, "step": 10408 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.6673059932077735e-06, "loss": 0.6748, "step": 10409 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.665425548455567e-06, "loss": 0.5899, "step": 10410 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.663545236358445e-06, "loss": 0.5156, "step": 10411 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.66166505699121e-06, "loss": 0.5694, "step": 10412 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.659785010428654e-06, "loss": 0.624, "step": 10413 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.657905096745578e-06, "loss": 0.6311, "step": 10414 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.656025316016761e-06, "loss": 0.5429, "step": 10415 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.654145668316985e-06, "loss": 0.7276, "step": 10416 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.652266153721025e-06, "loss": 0.619, "step": 10417 }, { "epoch": 1.88, "grad_norm": 0.0, "learning_rate": 6.650386772303652e-06, "loss": 0.621, "step": 10418 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.648507524139631e-06, "loss": 0.7754, "step": 10419 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.646628409303725e-06, "loss": 0.7573, "step": 10420 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.644749427870686e-06, "loss": 0.6386, "step": 10421 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.642870579915262e-06, "loss": 0.5515, "step": 10422 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.640991865512193e-06, "loss": 0.5903, "step": 10423 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.639113284736225e-06, "loss": 0.6402, "step": 10424 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.637234837662087e-06, "loss": 0.6051, "step": 10425 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.635356524364509e-06, "loss": 0.6327, "step": 10426 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.633478344918212e-06, "loss": 0.5046, "step": 10427 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.631600299397914e-06, "loss": 0.5059, "step": 10428 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.629722387878328e-06, "loss": 0.6337, "step": 10429 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.627844610434158e-06, "loss": 0.589, "step": 10430 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.625966967140104e-06, "loss": 0.668, "step": 10431 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.624089458070866e-06, "loss": 0.7179, "step": 10432 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.622212083301132e-06, "loss": 0.6311, "step": 10433 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.6203348429055866e-06, "loss": 0.574, "step": 10434 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.618457736958912e-06, "loss": 0.6898, "step": 10435 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.616580765535785e-06, "loss": 0.6187, "step": 10436 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.614703928710868e-06, "loss": 0.5735, "step": 10437 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.612827226558829e-06, "loss": 0.7177, "step": 10438 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.610950659154322e-06, "loss": 0.608, "step": 10439 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.609074226572007e-06, "loss": 0.6439, "step": 10440 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.607197928886528e-06, "loss": 0.7793, "step": 10441 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.605321766172524e-06, "loss": 0.6982, "step": 10442 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.603445738504639e-06, "loss": 0.6705, "step": 10443 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.601569845957503e-06, "loss": 0.5969, "step": 10444 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.599694088605739e-06, "loss": 0.5875, "step": 10445 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.597818466523968e-06, "loss": 0.6245, "step": 10446 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.595942979786805e-06, "loss": 0.5806, "step": 10447 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.594067628468863e-06, "loss": 0.7694, "step": 10448 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.592192412644746e-06, "loss": 0.6688, "step": 10449 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.590317332389052e-06, "loss": 0.8089, "step": 10450 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.588442387776381e-06, "loss": 0.5208, "step": 10451 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.58656757888131e-06, "loss": 0.6935, "step": 10452 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.584692905778429e-06, "loss": 0.6216, "step": 10453 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.582818368542317e-06, "loss": 0.7005, "step": 10454 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.580943967247542e-06, "loss": 0.6761, "step": 10455 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.579069701968673e-06, "loss": 0.5626, "step": 10456 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.577195572780273e-06, "loss": 0.7882, "step": 10457 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.575321579756897e-06, "loss": 0.648, "step": 10458 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.573447722973097e-06, "loss": 0.6363, "step": 10459 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.571574002503414e-06, "loss": 0.607, "step": 10460 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.56970041842239e-06, "loss": 0.7007, "step": 10461 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.56782697080456e-06, "loss": 0.5318, "step": 10462 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.565953659724455e-06, "loss": 0.6399, "step": 10463 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.564080485256592e-06, "loss": 0.618, "step": 10464 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.562207447475497e-06, "loss": 0.6706, "step": 10465 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.560334546455678e-06, "loss": 0.6838, "step": 10466 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.558461782271646e-06, "loss": 0.6982, "step": 10467 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.556589154997897e-06, "loss": 0.5686, "step": 10468 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.554716664708929e-06, "loss": 0.6479, "step": 10469 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.552844311479235e-06, "loss": 0.6076, "step": 10470 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.550972095383301e-06, "loss": 0.7054, "step": 10471 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.549100016495602e-06, "loss": 0.5631, "step": 10472 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.547228074890619e-06, "loss": 0.6918, "step": 10473 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 6.545356270642822e-06, "loss": 0.6655, "step": 10474 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.543484603826666e-06, "loss": 0.7422, "step": 10475 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.541613074516615e-06, "loss": 0.6512, "step": 10476 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.539741682787118e-06, "loss": 0.6424, "step": 10477 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.537870428712627e-06, "loss": 0.6857, "step": 10478 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.53599931236758e-06, "loss": 0.6506, "step": 10479 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.534128333826415e-06, "loss": 0.5521, "step": 10480 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.532257493163563e-06, "loss": 0.62, "step": 10481 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.530386790453449e-06, "loss": 0.6106, "step": 10482 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.528516225770492e-06, "loss": 0.6439, "step": 10483 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.526645799189106e-06, "loss": 0.6212, "step": 10484 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.524775510783699e-06, "loss": 0.5708, "step": 10485 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.522905360628676e-06, "loss": 0.7783, "step": 10486 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.521035348798436e-06, "loss": 0.5619, "step": 10487 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.519165475367366e-06, "loss": 0.6982, "step": 10488 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.517295740409864e-06, "loss": 0.6516, "step": 10489 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.515426144000297e-06, "loss": 0.5163, "step": 10490 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.5135566862130495e-06, "loss": 0.6728, "step": 10491 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.51168736712249e-06, "loss": 0.6728, "step": 10492 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.50981818680298e-06, "loss": 0.5001, "step": 10493 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.507949145328883e-06, "loss": 0.7245, "step": 10494 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.506080242774553e-06, "loss": 0.5549, "step": 10495 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.5042114792143325e-06, "loss": 0.5943, "step": 10496 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.502342854722576e-06, "loss": 0.658, "step": 10497 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.5004743693736046e-06, "loss": 0.66, "step": 10498 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.498606023241761e-06, "loss": 0.6486, "step": 10499 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.496737816401368e-06, "loss": 0.6496, "step": 10500 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.494869748926744e-06, "loss": 0.5578, "step": 10501 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.4930018208922075e-06, "loss": 0.6683, "step": 10502 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.491134032372066e-06, "loss": 0.6779, "step": 10503 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.489266383440627e-06, "loss": 0.7013, "step": 10504 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.487398874172184e-06, "loss": 0.6273, "step": 10505 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.485531504641027e-06, "loss": 0.6664, "step": 10506 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.483664274921451e-06, "loss": 0.6523, "step": 10507 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.481797185087733e-06, "loss": 0.5623, "step": 10508 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.479930235214151e-06, "loss": 0.5632, "step": 10509 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.478063425374972e-06, "loss": 0.6205, "step": 10510 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.4761967556444634e-06, "loss": 0.5938, "step": 10511 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.47433022609689e-06, "loss": 0.6135, "step": 10512 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.472463836806495e-06, "loss": 0.5006, "step": 10513 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.470597587847534e-06, "loss": 0.5838, "step": 10514 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.468731479294243e-06, "loss": 0.6017, "step": 10515 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.466865511220868e-06, "loss": 0.6051, "step": 10516 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.464999683701633e-06, "loss": 0.5942, "step": 10517 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.463133996810764e-06, "loss": 0.7279, "step": 10518 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.461268450622488e-06, "loss": 0.6673, "step": 10519 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.459403045211016e-06, "loss": 0.5198, "step": 10520 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.457537780650553e-06, "loss": 0.612, "step": 10521 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.4556726570153085e-06, "loss": 0.7401, "step": 10522 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.453807674379473e-06, "loss": 0.5664, "step": 10523 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.4519428328172466e-06, "loss": 0.7345, "step": 10524 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.450078132402811e-06, "loss": 0.6251, "step": 10525 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.448213573210347e-06, "loss": 0.6736, "step": 10526 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.446349155314039e-06, "loss": 0.6601, "step": 10527 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.4444848787880425e-06, "loss": 0.6473, "step": 10528 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 6.44262074370653e-06, "loss": 0.6563, "step": 10529 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.44075675014366e-06, "loss": 0.6065, "step": 10530 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.438892898173579e-06, "loss": 0.5591, "step": 10531 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.437029187870443e-06, "loss": 0.563, "step": 10532 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.43516561930839e-06, "loss": 0.5967, "step": 10533 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.4333021925615526e-06, "loss": 0.6677, "step": 10534 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.4314389077040705e-06, "loss": 0.5289, "step": 10535 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.429575764810056e-06, "loss": 0.5844, "step": 10536 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.4277127639536356e-06, "loss": 0.6362, "step": 10537 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.42584990520892e-06, "loss": 0.6512, "step": 10538 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.4239871886500185e-06, "loss": 0.6222, "step": 10539 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.422124614351033e-06, "loss": 0.7896, "step": 10540 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.420262182386061e-06, "loss": 0.5805, "step": 10541 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.418399892829192e-06, "loss": 0.7072, "step": 10542 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.416537745754509e-06, "loss": 0.5883, "step": 10543 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.414675741236094e-06, "loss": 0.6193, "step": 10544 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.412813879348019e-06, "loss": 0.5347, "step": 10545 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.410952160164354e-06, "loss": 0.7182, "step": 10546 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.409090583759159e-06, "loss": 0.6072, "step": 10547 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.407229150206494e-06, "loss": 0.7393, "step": 10548 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.405367859580408e-06, "loss": 0.6029, "step": 10549 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.403506711954948e-06, "loss": 0.606, "step": 10550 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.401645707404151e-06, "loss": 0.5359, "step": 10551 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.39978484600205e-06, "loss": 0.5998, "step": 10552 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.397924127822677e-06, "loss": 0.5924, "step": 10553 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.396063552940053e-06, "loss": 0.5828, "step": 10554 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.394203121428195e-06, "loss": 0.6513, "step": 10555 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.392342833361112e-06, "loss": 0.6221, "step": 10556 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.390482688812814e-06, "loss": 0.649, "step": 10557 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.388622687857298e-06, "loss": 0.5565, "step": 10558 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.386762830568556e-06, "loss": 0.6359, "step": 10559 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.38490311702058e-06, "loss": 0.688, "step": 10560 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.3830435472873485e-06, "loss": 0.6153, "step": 10561 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.381184121442843e-06, "loss": 0.5659, "step": 10562 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.3793248395610315e-06, "loss": 0.4987, "step": 10563 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.3774657017158796e-06, "loss": 0.6935, "step": 10564 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.375606707981354e-06, "loss": 0.6238, "step": 10565 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.373747858431396e-06, "loss": 0.6646, "step": 10566 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.371889153139963e-06, "loss": 0.6573, "step": 10567 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.370030592180993e-06, "loss": 0.682, "step": 10568 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.368172175628425e-06, "loss": 0.6994, "step": 10569 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.3663139035561894e-06, "loss": 0.6669, "step": 10570 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.364455776038212e-06, "loss": 0.5969, "step": 10571 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.362597793148411e-06, "loss": 0.6392, "step": 10572 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.360739954960706e-06, "loss": 0.5582, "step": 10573 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.358882261548994e-06, "loss": 0.5904, "step": 10574 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.357024712987186e-06, "loss": 0.6579, "step": 10575 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.355167309349175e-06, "loss": 0.6451, "step": 10576 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.353310050708849e-06, "loss": 0.5402, "step": 10577 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.351452937140101e-06, "loss": 0.5281, "step": 10578 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.349595968716804e-06, "loss": 0.606, "step": 10579 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.3477391455128355e-06, "loss": 0.5672, "step": 10580 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.345882467602058e-06, "loss": 0.6509, "step": 10581 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.344025935058333e-06, "loss": 0.5044, "step": 10582 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.342169547955523e-06, "loss": 0.7183, "step": 10583 }, { "epoch": 1.91, "grad_norm": 0.0, "learning_rate": 6.340313306367474e-06, "loss": 0.6634, "step": 10584 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.338457210368029e-06, "loss": 0.5972, "step": 10585 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.33660126003103e-06, "loss": 0.6313, "step": 10586 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.334745455430309e-06, "loss": 0.6008, "step": 10587 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.332889796639696e-06, "loss": 0.5672, "step": 10588 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.331034283733006e-06, "loss": 0.6787, "step": 10589 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.329178916784056e-06, "loss": 0.624, "step": 10590 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.327323695866658e-06, "loss": 0.5865, "step": 10591 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.325468621054616e-06, "loss": 0.4867, "step": 10592 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.323613692421724e-06, "loss": 0.6174, "step": 10593 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.3217589100417795e-06, "loss": 0.7414, "step": 10594 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.31990427398857e-06, "loss": 0.5879, "step": 10595 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.31804978433587e-06, "loss": 0.6026, "step": 10596 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.316195441157458e-06, "loss": 0.7732, "step": 10597 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.3143412445270975e-06, "loss": 0.7742, "step": 10598 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.31248719451856e-06, "loss": 0.5454, "step": 10599 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.310633291205599e-06, "loss": 0.5507, "step": 10600 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.3087795346619666e-06, "loss": 0.596, "step": 10601 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.3069259249614046e-06, "loss": 0.6199, "step": 10602 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.30507246217766e-06, "loss": 0.6125, "step": 10603 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.3032191463844605e-06, "loss": 0.625, "step": 10604 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.301365977655538e-06, "loss": 0.6217, "step": 10605 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.29951295606461e-06, "loss": 0.5853, "step": 10606 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.297660081685397e-06, "loss": 0.6829, "step": 10607 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.295807354591609e-06, "loss": 0.5844, "step": 10608 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.29395477485695e-06, "loss": 0.7307, "step": 10609 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.292102342555116e-06, "loss": 0.6043, "step": 10610 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.29025005775981e-06, "loss": 0.662, "step": 10611 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.288397920544706e-06, "loss": 0.7087, "step": 10612 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.286545930983492e-06, "loss": 0.5495, "step": 10613 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.284694089149843e-06, "loss": 0.6309, "step": 10614 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.282842395117426e-06, "loss": 0.62, "step": 10615 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.280990848959908e-06, "loss": 0.5286, "step": 10616 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.279139450750945e-06, "loss": 0.6038, "step": 10617 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.277288200564191e-06, "loss": 0.6794, "step": 10618 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.275437098473288e-06, "loss": 0.5777, "step": 10619 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.273586144551875e-06, "loss": 0.6866, "step": 10620 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.271735338873592e-06, "loss": 0.6288, "step": 10621 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.269884681512064e-06, "loss": 0.5353, "step": 10622 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.26803417254091e-06, "loss": 0.5706, "step": 10623 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.266183812033753e-06, "loss": 0.6447, "step": 10624 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.264333600064199e-06, "loss": 0.6347, "step": 10625 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.262483536705858e-06, "loss": 0.7286, "step": 10626 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.260633622032322e-06, "loss": 0.7008, "step": 10627 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.258783856117184e-06, "loss": 0.6107, "step": 10628 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.256934239034035e-06, "loss": 0.5731, "step": 10629 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.2550847708564546e-06, "loss": 0.5952, "step": 10630 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.2532354516580155e-06, "loss": 0.5834, "step": 10631 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.25138628151229e-06, "loss": 0.6273, "step": 10632 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.249537260492842e-06, "loss": 0.6438, "step": 10633 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.247688388673225e-06, "loss": 0.7068, "step": 10634 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.245839666126992e-06, "loss": 0.6436, "step": 10635 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.243991092927687e-06, "loss": 0.6779, "step": 10636 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.242142669148852e-06, "loss": 0.6545, "step": 10637 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.240294394864019e-06, "loss": 0.6625, "step": 10638 }, { "epoch": 1.92, "grad_norm": 0.0, "learning_rate": 6.238446270146714e-06, "loss": 0.5404, "step": 10639 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.236598295070462e-06, "loss": 0.6217, "step": 10640 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.234750469708779e-06, "loss": 0.6814, "step": 10641 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.232902794135172e-06, "loss": 0.6495, "step": 10642 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.231055268423143e-06, "loss": 0.5396, "step": 10643 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.229207892646192e-06, "loss": 0.5989, "step": 10644 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.227360666877811e-06, "loss": 0.7265, "step": 10645 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.225513591191485e-06, "loss": 0.5801, "step": 10646 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.223666665660697e-06, "loss": 0.5431, "step": 10647 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.221819890358919e-06, "loss": 0.5578, "step": 10648 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.219973265359613e-06, "loss": 0.6985, "step": 10649 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.218126790736249e-06, "loss": 0.5707, "step": 10650 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.216280466562281e-06, "loss": 0.6246, "step": 10651 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.214434292911156e-06, "loss": 0.627, "step": 10652 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.212588269856319e-06, "loss": 0.6127, "step": 10653 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.210742397471212e-06, "loss": 0.6635, "step": 10654 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.208896675829261e-06, "loss": 0.6087, "step": 10655 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.207051105003898e-06, "loss": 0.6332, "step": 10656 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.205205685068538e-06, "loss": 0.5967, "step": 10657 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.203360416096596e-06, "loss": 0.6499, "step": 10658 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.20151529816148e-06, "loss": 0.6258, "step": 10659 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.199670331336595e-06, "loss": 0.5795, "step": 10660 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.197825515695331e-06, "loss": 0.582, "step": 10661 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.1959808513110844e-06, "loss": 0.6835, "step": 10662 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.1941363382572354e-06, "loss": 0.5321, "step": 10663 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.192291976607166e-06, "loss": 0.5959, "step": 10664 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.19044776643424e-06, "loss": 0.5413, "step": 10665 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.188603707811829e-06, "loss": 0.6028, "step": 10666 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.186759800813291e-06, "loss": 0.5879, "step": 10667 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.184916045511983e-06, "loss": 0.7203, "step": 10668 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.1830724419812475e-06, "loss": 0.6174, "step": 10669 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.18122899029443e-06, "loss": 0.5931, "step": 10670 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.179385690524868e-06, "loss": 0.6634, "step": 10671 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.177542542745887e-06, "loss": 0.5985, "step": 10672 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.175699547030811e-06, "loss": 0.6431, "step": 10673 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.173856703452956e-06, "loss": 0.607, "step": 10674 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.172014012085638e-06, "loss": 0.6755, "step": 10675 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.1701714730021625e-06, "loss": 0.5529, "step": 10676 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.168329086275822e-06, "loss": 0.628, "step": 10677 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.166486851979918e-06, "loss": 0.5585, "step": 10678 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.164644770187735e-06, "loss": 0.5826, "step": 10679 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.162802840972553e-06, "loss": 0.7194, "step": 10680 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.160961064407646e-06, "loss": 0.6631, "step": 10681 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.159119440566283e-06, "loss": 0.6417, "step": 10682 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.157277969521731e-06, "loss": 0.6652, "step": 10683 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.155436651347243e-06, "loss": 0.5466, "step": 10684 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.1535954861160695e-06, "loss": 0.6717, "step": 10685 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.151754473901464e-06, "loss": 0.5231, "step": 10686 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.14991361477665e-06, "loss": 0.5519, "step": 10687 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.148072908814872e-06, "loss": 0.5205, "step": 10688 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.146232356089351e-06, "loss": 0.581, "step": 10689 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.144391956673308e-06, "loss": 0.6188, "step": 10690 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.142551710639959e-06, "loss": 0.5952, "step": 10691 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.140711618062512e-06, "loss": 0.6949, "step": 10692 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.138871679014167e-06, "loss": 0.6564, "step": 10693 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.137031893568124e-06, "loss": 0.7272, "step": 10694 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 6.1351922617975644e-06, "loss": 0.6458, "step": 10695 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.133352783775681e-06, "loss": 0.7746, "step": 10696 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.1315134595756464e-06, "loss": 0.6713, "step": 10697 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.129674289270635e-06, "loss": 0.6079, "step": 10698 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.127835272933809e-06, "loss": 0.6275, "step": 10699 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.125996410638329e-06, "loss": 0.5817, "step": 10700 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.124157702457352e-06, "loss": 0.5439, "step": 10701 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.122319148464018e-06, "loss": 0.663, "step": 10702 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.120480748731472e-06, "loss": 0.7359, "step": 10703 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.118642503332846e-06, "loss": 0.5374, "step": 10704 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.116804412341272e-06, "loss": 0.5684, "step": 10705 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.114966475829871e-06, "loss": 0.6458, "step": 10706 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.113128693871757e-06, "loss": 0.5515, "step": 10707 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.1112910665400435e-06, "loss": 0.7146, "step": 10708 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.109453593907836e-06, "loss": 0.7478, "step": 10709 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.1076162760482275e-06, "loss": 0.6428, "step": 10710 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.10577911303431e-06, "loss": 0.6015, "step": 10711 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.10394210493917e-06, "loss": 0.563, "step": 10712 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.102105251835888e-06, "loss": 0.6374, "step": 10713 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.1002685537975396e-06, "loss": 0.6443, "step": 10714 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.098432010897184e-06, "loss": 0.6996, "step": 10715 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.096595623207888e-06, "loss": 0.7394, "step": 10716 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.094759390802709e-06, "loss": 0.628, "step": 10717 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.092923313754689e-06, "loss": 0.5457, "step": 10718 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.091087392136873e-06, "loss": 0.619, "step": 10719 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.089251626022295e-06, "loss": 0.5498, "step": 10720 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.0874160154839875e-06, "loss": 0.7119, "step": 10721 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.085580560594975e-06, "loss": 0.6526, "step": 10722 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.083745261428271e-06, "loss": 0.618, "step": 10723 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.081910118056895e-06, "loss": 0.5648, "step": 10724 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.080075130553842e-06, "loss": 0.6008, "step": 10725 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.078240298992115e-06, "loss": 0.5492, "step": 10726 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.07640562344471e-06, "loss": 0.514, "step": 10727 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.074571103984608e-06, "loss": 0.7396, "step": 10728 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.0727367406847924e-06, "loss": 0.626, "step": 10729 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.0709025336182396e-06, "loss": 0.6044, "step": 10730 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.06906848285791e-06, "loss": 0.7008, "step": 10731 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.067234588476778e-06, "loss": 0.6713, "step": 10732 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.0654008505477846e-06, "loss": 0.5995, "step": 10733 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.0635672691438885e-06, "loss": 0.6046, "step": 10734 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.06173384433803e-06, "loss": 0.5698, "step": 10735 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.059900576203144e-06, "loss": 0.6318, "step": 10736 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.0580674648121625e-06, "loss": 0.6272, "step": 10737 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.056234510238013e-06, "loss": 0.6397, "step": 10738 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.0544017125536125e-06, "loss": 0.5447, "step": 10739 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.052569071831867e-06, "loss": 0.5614, "step": 10740 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.050736588145687e-06, "loss": 0.5523, "step": 10741 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.048904261567969e-06, "loss": 0.5954, "step": 10742 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.047072092171611e-06, "loss": 0.6375, "step": 10743 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.045240080029498e-06, "loss": 0.6192, "step": 10744 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.0434082252145044e-06, "loss": 0.5761, "step": 10745 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.041576527799513e-06, "loss": 0.7022, "step": 10746 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.03974498785739e-06, "loss": 0.6098, "step": 10747 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.037913605460995e-06, "loss": 0.6172, "step": 10748 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.0360823806831835e-06, "loss": 0.593, "step": 10749 }, { "epoch": 1.94, "grad_norm": 0.0, "learning_rate": 6.034251313596804e-06, "loss": 0.6169, "step": 10750 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.032420404274701e-06, "loss": 0.6207, "step": 10751 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.030589652789713e-06, "loss": 0.5697, "step": 10752 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.028759059214666e-06, "loss": 0.69, "step": 10753 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.026928623622392e-06, "loss": 0.5723, "step": 10754 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.025098346085699e-06, "loss": 0.6168, "step": 10755 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.023268226677405e-06, "loss": 0.6766, "step": 10756 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.021438265470313e-06, "loss": 0.6374, "step": 10757 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.019608462537221e-06, "loss": 0.6803, "step": 10758 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.017778817950923e-06, "loss": 0.7656, "step": 10759 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.0159493317842074e-06, "loss": 0.6039, "step": 10760 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.0141200041098516e-06, "loss": 0.6595, "step": 10761 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.0122908350006335e-06, "loss": 0.6099, "step": 10762 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.010461824529313e-06, "loss": 0.6049, "step": 10763 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.008632972768658e-06, "loss": 0.6658, "step": 10764 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.006804279791419e-06, "loss": 0.5249, "step": 10765 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.004975745670346e-06, "loss": 0.619, "step": 10766 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.003147370478184e-06, "loss": 0.6418, "step": 10767 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 6.001319154287666e-06, "loss": 0.6715, "step": 10768 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.999491097171519e-06, "loss": 0.605, "step": 10769 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.997663199202477e-06, "loss": 0.6544, "step": 10770 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.995835460453243e-06, "loss": 0.606, "step": 10771 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.9940078809965354e-06, "loss": 0.5775, "step": 10772 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.992180460905057e-06, "loss": 0.5778, "step": 10773 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.990353200251504e-06, "loss": 0.726, "step": 10774 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.988526099108571e-06, "loss": 0.6181, "step": 10775 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.9866991575489434e-06, "loss": 0.7373, "step": 10776 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.984872375645299e-06, "loss": 0.6492, "step": 10777 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.983045753470308e-06, "loss": 0.6002, "step": 10778 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.981219291096636e-06, "loss": 0.6976, "step": 10779 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.9793929885969484e-06, "loss": 0.7024, "step": 10780 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.977566846043894e-06, "loss": 0.6143, "step": 10781 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.975740863510122e-06, "loss": 0.6008, "step": 10782 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.97391504106827e-06, "loss": 0.6011, "step": 10783 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.972089378790977e-06, "loss": 0.5695, "step": 10784 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.970263876750871e-06, "loss": 0.6908, "step": 10785 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.96843853502057e-06, "loss": 0.6399, "step": 10786 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.96661335367269e-06, "loss": 0.5895, "step": 10787 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.9647883327798375e-06, "loss": 0.576, "step": 10788 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.96296347241462e-06, "loss": 0.6926, "step": 10789 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.961138772649632e-06, "loss": 0.602, "step": 10790 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.9593142335574605e-06, "loss": 0.7394, "step": 10791 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.957489855210696e-06, "loss": 0.7087, "step": 10792 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.955665637681904e-06, "loss": 0.7394, "step": 10793 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.953841581043664e-06, "loss": 0.5759, "step": 10794 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.9520176853685386e-06, "loss": 0.7225, "step": 10795 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.950193950729082e-06, "loss": 0.6506, "step": 10796 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.9483703771978475e-06, "loss": 0.5475, "step": 10797 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.946546964847381e-06, "loss": 0.5844, "step": 10798 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.9447237137502204e-06, "loss": 0.6779, "step": 10799 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.942900623978902e-06, "loss": 0.5969, "step": 10800 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.941077695605941e-06, "loss": 0.5664, "step": 10801 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.939254928703864e-06, "loss": 0.5688, "step": 10802 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.937432323345183e-06, "loss": 0.5982, "step": 10803 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.935609879602402e-06, "loss": 0.5282, "step": 10804 }, { "epoch": 1.95, "grad_norm": 0.0, "learning_rate": 5.933787597548024e-06, "loss": 0.6357, "step": 10805 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.931965477254542e-06, "loss": 0.7861, "step": 10806 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.930143518794442e-06, "loss": 0.6443, "step": 10807 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.928321722240205e-06, "loss": 0.6382, "step": 10808 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.926500087664303e-06, "loss": 0.6473, "step": 10809 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.9246786151392076e-06, "loss": 0.6456, "step": 10810 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.922857304737378e-06, "loss": 0.7615, "step": 10811 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.9210361565312675e-06, "loss": 0.5497, "step": 10812 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.9192151705933286e-06, "loss": 0.6144, "step": 10813 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.917394346996001e-06, "loss": 0.5526, "step": 10814 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.915573685811722e-06, "loss": 0.6498, "step": 10815 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.913753187112916e-06, "loss": 0.5741, "step": 10816 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.911932850972007e-06, "loss": 0.6259, "step": 10817 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.910112677461415e-06, "loss": 0.6227, "step": 10818 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.908292666653547e-06, "loss": 0.7545, "step": 10819 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.906472818620803e-06, "loss": 0.5881, "step": 10820 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.904653133435586e-06, "loss": 0.6753, "step": 10821 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.902833611170282e-06, "loss": 0.6667, "step": 10822 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.901014251897279e-06, "loss": 0.6053, "step": 10823 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.899195055688948e-06, "loss": 0.5605, "step": 10824 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8973760226176625e-06, "loss": 0.5379, "step": 10825 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.895557152755787e-06, "loss": 0.6007, "step": 10826 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8937384461756805e-06, "loss": 0.601, "step": 10827 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.891919902949692e-06, "loss": 0.659, "step": 10828 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8901015231501665e-06, "loss": 0.603, "step": 10829 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.888283306849446e-06, "loss": 0.6457, "step": 10830 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8864652541198575e-06, "loss": 0.5717, "step": 10831 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.884647365033729e-06, "loss": 0.6003, "step": 10832 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.882829639663377e-06, "loss": 0.7718, "step": 10833 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8810120780811145e-06, "loss": 0.5237, "step": 10834 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8791946803592484e-06, "loss": 0.5844, "step": 10835 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.877377446570077e-06, "loss": 0.59, "step": 10836 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.875560376785892e-06, "loss": 0.6276, "step": 10837 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8737434710789875e-06, "loss": 0.6623, "step": 10838 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.871926729521629e-06, "loss": 0.6523, "step": 10839 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.870110152186099e-06, "loss": 0.7524, "step": 10840 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8682937391446635e-06, "loss": 0.6862, "step": 10841 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.866477490469577e-06, "loss": 0.5521, "step": 10842 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.864661406233101e-06, "loss": 0.7632, "step": 10843 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.862845486507478e-06, "loss": 0.7514, "step": 10844 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.861029731364951e-06, "loss": 0.5979, "step": 10845 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8592141408777495e-06, "loss": 0.6173, "step": 10846 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8573987151181e-06, "loss": 0.6546, "step": 10847 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.855583454158229e-06, "loss": 0.6189, "step": 10848 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.853768358070349e-06, "loss": 0.609, "step": 10849 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.851953426926663e-06, "loss": 0.5914, "step": 10850 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.850138660799378e-06, "loss": 0.6211, "step": 10851 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.848324059760687e-06, "loss": 0.6485, "step": 10852 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.846509623882779e-06, "loss": 0.6369, "step": 10853 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8446953532378304e-06, "loss": 0.4938, "step": 10854 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.842881247898018e-06, "loss": 0.55, "step": 10855 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.841067307935513e-06, "loss": 0.6983, "step": 10856 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.8392535334224755e-06, "loss": 0.5983, "step": 10857 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.837439924431057e-06, "loss": 0.7637, "step": 10858 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.835626481033413e-06, "loss": 0.7116, "step": 10859 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.833813203301684e-06, "loss": 0.6223, "step": 10860 }, { "epoch": 1.96, "grad_norm": 0.0, "learning_rate": 5.832000091308e-06, "loss": 0.6013, "step": 10861 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.8301871451244875e-06, "loss": 0.7284, "step": 10862 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.8283743648232785e-06, "loss": 0.6744, "step": 10863 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.826561750476483e-06, "loss": 0.6282, "step": 10864 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.8247493021562105e-06, "loss": 0.5499, "step": 10865 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.822937019934564e-06, "loss": 0.6056, "step": 10866 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.821124903883638e-06, "loss": 0.6603, "step": 10867 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.819312954075522e-06, "loss": 0.5641, "step": 10868 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.817501170582299e-06, "loss": 0.6169, "step": 10869 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.815689553476045e-06, "loss": 0.7773, "step": 10870 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.813878102828827e-06, "loss": 0.6565, "step": 10871 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.812066818712712e-06, "loss": 0.6612, "step": 10872 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.8102557011997475e-06, "loss": 0.5486, "step": 10873 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.808444750361992e-06, "loss": 0.6564, "step": 10874 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.806633966271485e-06, "loss": 0.5242, "step": 10875 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.804823349000268e-06, "loss": 0.6694, "step": 10876 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.8030128986203595e-06, "loss": 0.6961, "step": 10877 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.801202615203785e-06, "loss": 0.5868, "step": 10878 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.799392498822566e-06, "loss": 0.5041, "step": 10879 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.79758254954871e-06, "loss": 0.6038, "step": 10880 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.795772767454219e-06, "loss": 0.5817, "step": 10881 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.79396315261109e-06, "loss": 0.5941, "step": 10882 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.792153705091312e-06, "loss": 0.5866, "step": 10883 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.790344424966869e-06, "loss": 0.6539, "step": 10884 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.788535312309735e-06, "loss": 0.6382, "step": 10885 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.786726367191881e-06, "loss": 0.5957, "step": 10886 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.7849175896852705e-06, "loss": 0.5662, "step": 10887 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.783108979861859e-06, "loss": 0.5809, "step": 10888 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.7813005377935925e-06, "loss": 0.6474, "step": 10889 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.779492263552421e-06, "loss": 0.5313, "step": 10890 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.777684157210282e-06, "loss": 0.671, "step": 10891 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.775876218839096e-06, "loss": 0.5908, "step": 10892 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.77406844851079e-06, "loss": 0.625, "step": 10893 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.7722608462972775e-06, "loss": 0.6013, "step": 10894 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.770453412270474e-06, "loss": 0.6056, "step": 10895 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.768646146502279e-06, "loss": 0.6225, "step": 10896 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.76683904906459e-06, "loss": 0.622, "step": 10897 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.765032120029299e-06, "loss": 0.5491, "step": 10898 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.763225359468278e-06, "loss": 0.7265, "step": 10899 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.761418767453413e-06, "loss": 0.5086, "step": 10900 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.759612344056571e-06, "loss": 0.5627, "step": 10901 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.757806089349616e-06, "loss": 0.6975, "step": 10902 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.7560000034043995e-06, "loss": 0.4627, "step": 10903 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.754194086292775e-06, "loss": 0.6568, "step": 10904 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.752388338086581e-06, "loss": 0.7636, "step": 10905 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.750582758857656e-06, "loss": 0.5928, "step": 10906 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.74877734867783e-06, "loss": 0.6933, "step": 10907 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.7469721076189235e-06, "loss": 0.6075, "step": 10908 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.745167035752752e-06, "loss": 0.6657, "step": 10909 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.743362133151125e-06, "loss": 0.572, "step": 10910 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.74155739988584e-06, "loss": 0.5309, "step": 10911 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.7397528360287e-06, "loss": 0.5958, "step": 10912 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.737948441651495e-06, "loss": 0.5556, "step": 10913 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.736144216826e-06, "loss": 0.6453, "step": 10914 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.73434016162399e-06, "loss": 0.6726, "step": 10915 }, { "epoch": 1.97, "grad_norm": 0.0, "learning_rate": 5.732536276117233e-06, "loss": 0.5779, "step": 10916 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.730732560377498e-06, "loss": 0.6707, "step": 10917 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.728929014476534e-06, "loss": 0.5109, "step": 10918 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.727125638486092e-06, "loss": 0.5905, "step": 10919 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.725322432477911e-06, "loss": 0.569, "step": 10920 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.723519396523726e-06, "loss": 0.6295, "step": 10921 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.721716530695267e-06, "loss": 0.67, "step": 10922 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.719913835064252e-06, "loss": 0.5869, "step": 10923 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.7181113097023985e-06, "loss": 0.631, "step": 10924 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.716308954681411e-06, "loss": 0.7146, "step": 10925 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.714506770072993e-06, "loss": 0.6475, "step": 10926 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.712704755948833e-06, "loss": 0.5504, "step": 10927 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.710902912380625e-06, "loss": 0.6453, "step": 10928 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.7091012394400515e-06, "loss": 0.6545, "step": 10929 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.707299737198777e-06, "loss": 0.5599, "step": 10930 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.705498405728474e-06, "loss": 0.6476, "step": 10931 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.7036972451007946e-06, "loss": 0.5821, "step": 10932 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.701896255387405e-06, "loss": 0.6485, "step": 10933 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.700095436659945e-06, "loss": 0.6645, "step": 10934 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.698294788990056e-06, "loss": 0.687, "step": 10935 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.696494312449371e-06, "loss": 0.5555, "step": 10936 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.694694007109507e-06, "loss": 0.6213, "step": 10937 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.692893873042096e-06, "loss": 0.6462, "step": 10938 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.691093910318746e-06, "loss": 0.6786, "step": 10939 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.689294119011059e-06, "loss": 0.6266, "step": 10940 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.687494499190638e-06, "loss": 0.5699, "step": 10941 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.685695050929074e-06, "loss": 0.5961, "step": 10942 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.6838957742979515e-06, "loss": 0.5788, "step": 10943 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.682096669368848e-06, "loss": 0.659, "step": 10944 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.680297736213338e-06, "loss": 0.679, "step": 10945 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.678498974902983e-06, "loss": 0.5815, "step": 10946 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.6767003855093415e-06, "loss": 0.6087, "step": 10947 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.674901968103965e-06, "loss": 0.6232, "step": 10948 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.673103722758394e-06, "loss": 0.5369, "step": 10949 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.671305649544173e-06, "loss": 0.6849, "step": 10950 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.669507748532833e-06, "loss": 0.5898, "step": 10951 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.667710019795889e-06, "loss": 0.6003, "step": 10952 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.6659124634048625e-06, "loss": 0.6921, "step": 10953 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.664115079431258e-06, "loss": 0.5744, "step": 10954 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.662317867946589e-06, "loss": 0.5886, "step": 10955 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.660520829022344e-06, "loss": 0.5869, "step": 10956 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.658723962730016e-06, "loss": 0.6406, "step": 10957 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.656927269141086e-06, "loss": 0.573, "step": 10958 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.655130748327028e-06, "loss": 0.5864, "step": 10959 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.653334400359311e-06, "loss": 0.6383, "step": 10960 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.651538225309401e-06, "loss": 0.6766, "step": 10961 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.649742223248747e-06, "loss": 0.6716, "step": 10962 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.6479463942488025e-06, "loss": 0.5939, "step": 10963 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.646150738381003e-06, "loss": 0.627, "step": 10964 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.644355255716783e-06, "loss": 0.6966, "step": 10965 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.642559946327581e-06, "loss": 0.501, "step": 10966 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.640764810284804e-06, "loss": 0.5104, "step": 10967 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.638969847659872e-06, "loss": 0.6011, "step": 10968 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.637175058524189e-06, "loss": 0.5837, "step": 10969 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.635380442949152e-06, "loss": 0.6271, "step": 10970 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 5.633586001006164e-06, "loss": 0.5613, "step": 10971 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.631791732766603e-06, "loss": 0.6603, "step": 10972 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.62999763830185e-06, "loss": 0.6673, "step": 10973 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.628203717683282e-06, "loss": 0.5958, "step": 10974 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.626409970982252e-06, "loss": 0.6295, "step": 10975 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.624616398270129e-06, "loss": 0.5871, "step": 10976 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.62282299961826e-06, "loss": 0.7977, "step": 10977 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.621029775097992e-06, "loss": 0.6247, "step": 10978 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.6192367247806604e-06, "loss": 0.6053, "step": 10979 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.617443848737596e-06, "loss": 0.607, "step": 10980 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.615651147040121e-06, "loss": 0.684, "step": 10981 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.61385861975956e-06, "loss": 0.5594, "step": 10982 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.612066266967215e-06, "loss": 0.6174, "step": 10983 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.61027408873439e-06, "loss": 0.7116, "step": 10984 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.608482085132382e-06, "loss": 0.6979, "step": 10985 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.606690256232474e-06, "loss": 0.54, "step": 10986 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.6048986021059606e-06, "loss": 0.5816, "step": 10987 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.603107122824109e-06, "loss": 0.5641, "step": 10988 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.60131581845819e-06, "loss": 0.5713, "step": 10989 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.599524689079463e-06, "loss": 0.4696, "step": 10990 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.5977337347591765e-06, "loss": 0.5744, "step": 10991 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.595942955568587e-06, "loss": 0.5619, "step": 10992 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.594152351578932e-06, "loss": 0.6326, "step": 10993 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.592361922861443e-06, "loss": 0.7078, "step": 10994 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.590571669487348e-06, "loss": 0.518, "step": 10995 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.588781591527866e-06, "loss": 0.6259, "step": 10996 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.58699168905421e-06, "loss": 0.5369, "step": 10997 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.585201962137583e-06, "loss": 0.5373, "step": 10998 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.583412410849185e-06, "loss": 0.6105, "step": 10999 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.581623035260208e-06, "loss": 0.6173, "step": 11000 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.5798338354418365e-06, "loss": 0.6983, "step": 11001 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.578044811465246e-06, "loss": 0.5222, "step": 11002 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.5762559634016045e-06, "loss": 0.6818, "step": 11003 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.5744672913220874e-06, "loss": 0.6076, "step": 11004 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.572678795297838e-06, "loss": 0.6879, "step": 11005 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.57089047540001e-06, "loss": 0.581, "step": 11006 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.569102331699748e-06, "loss": 0.6833, "step": 11007 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.567314364268182e-06, "loss": 0.6781, "step": 11008 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.565526573176447e-06, "loss": 0.6157, "step": 11009 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.5637389584956615e-06, "loss": 0.6456, "step": 11010 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.56195152029694e-06, "loss": 0.6742, "step": 11011 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.560164258651395e-06, "loss": 0.5959, "step": 11012 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.558377173630113e-06, "loss": 0.5618, "step": 11013 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.5565902653042e-06, "loss": 0.6539, "step": 11014 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.554803533744737e-06, "loss": 0.6207, "step": 11015 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.553016979022806e-06, "loss": 0.5763, "step": 11016 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.551230601209478e-06, "loss": 0.5286, "step": 11017 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.5494444003758165e-06, "loss": 0.6159, "step": 11018 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.547658376592882e-06, "loss": 0.5928, "step": 11019 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.545872529931724e-06, "loss": 0.6735, "step": 11020 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.5440868604633865e-06, "loss": 0.6736, "step": 11021 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.542301368258908e-06, "loss": 0.63, "step": 11022 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.540516053389318e-06, "loss": 0.6312, "step": 11023 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.538730915925634e-06, "loss": 0.6353, "step": 11024 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.53694595593888e-06, "loss": 0.6033, "step": 11025 }, { "epoch": 1.99, "grad_norm": 0.0, "learning_rate": 5.535161173500063e-06, "loss": 0.5571, "step": 11026 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.533376568680187e-06, "loss": 0.6481, "step": 11027 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.531592141550238e-06, "loss": 0.6484, "step": 11028 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.529807892181205e-06, "loss": 0.6369, "step": 11029 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.528023820644076e-06, "loss": 0.6486, "step": 11030 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.52623992700982e-06, "loss": 0.6596, "step": 11031 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.524456211349405e-06, "loss": 0.6635, "step": 11032 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.522672673733786e-06, "loss": 0.6972, "step": 11033 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.520889314233922e-06, "loss": 0.6889, "step": 11034 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.519106132920751e-06, "loss": 0.5621, "step": 11035 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.517323129865215e-06, "loss": 0.6117, "step": 11036 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.515540305138245e-06, "loss": 0.6938, "step": 11037 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.513757658810763e-06, "loss": 0.5578, "step": 11038 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.511975190953688e-06, "loss": 0.7003, "step": 11039 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.510192901637927e-06, "loss": 0.6362, "step": 11040 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.508410790934381e-06, "loss": 0.6141, "step": 11041 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.506628858913956e-06, "loss": 0.6504, "step": 11042 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.504847105647527e-06, "loss": 0.5831, "step": 11043 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.503065531205982e-06, "loss": 0.6922, "step": 11044 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.501284135660194e-06, "loss": 0.5135, "step": 11045 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.499502919081026e-06, "loss": 0.7189, "step": 11046 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.497721881539345e-06, "loss": 0.5143, "step": 11047 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.495941023106001e-06, "loss": 0.6107, "step": 11048 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.494160343851838e-06, "loss": 0.6072, "step": 11049 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.4923798438477e-06, "loss": 0.6324, "step": 11050 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.490599523164406e-06, "loss": 0.6411, "step": 11051 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.488819381872792e-06, "loss": 0.6349, "step": 11052 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.487039420043672e-06, "loss": 0.5892, "step": 11053 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.485259637747853e-06, "loss": 0.5377, "step": 11054 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.483480035056141e-06, "loss": 0.4945, "step": 11055 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.48170061203933e-06, "loss": 0.3865, "step": 11056 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.479921368768208e-06, "loss": 0.402, "step": 11057 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.4781423053135595e-06, "loss": 0.4275, "step": 11058 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.4763634217461555e-06, "loss": 0.4899, "step": 11059 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.4745847181367635e-06, "loss": 0.4489, "step": 11060 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.472806194556144e-06, "loss": 0.437, "step": 11061 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.4710278510750456e-06, "loss": 0.4502, "step": 11062 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.469249687764222e-06, "loss": 0.4995, "step": 11063 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.467471704694408e-06, "loss": 0.4801, "step": 11064 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.465693901936337e-06, "loss": 0.5126, "step": 11065 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.463916279560728e-06, "loss": 0.4363, "step": 11066 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.462138837638295e-06, "loss": 0.4887, "step": 11067 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.460361576239757e-06, "loss": 0.485, "step": 11068 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.458584495435813e-06, "loss": 0.4895, "step": 11069 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.456807595297156e-06, "loss": 0.4993, "step": 11070 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.455030875894477e-06, "loss": 0.4469, "step": 11071 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.453254337298454e-06, "loss": 0.4362, "step": 11072 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.4514779795797645e-06, "loss": 0.3945, "step": 11073 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.449701802809073e-06, "loss": 0.4034, "step": 11074 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.4479258070570384e-06, "loss": 0.4336, "step": 11075 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.446149992394314e-06, "loss": 0.4082, "step": 11076 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.444374358891542e-06, "loss": 0.3927, "step": 11077 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.442598906619361e-06, "loss": 0.4685, "step": 11078 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.4408236356484045e-06, "loss": 0.3962, "step": 11079 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.4390485460492995e-06, "loss": 0.4472, "step": 11080 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.437273637892652e-06, "loss": 0.447, "step": 11081 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 5.435498911249075e-06, "loss": 0.4277, "step": 11082 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.433724366189168e-06, "loss": 0.4649, "step": 11083 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.43195000278353e-06, "loss": 0.4223, "step": 11084 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.430175821102749e-06, "loss": 0.4821, "step": 11085 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.428401821217401e-06, "loss": 0.3742, "step": 11086 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.42662800319806e-06, "loss": 0.4467, "step": 11087 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.424854367115293e-06, "loss": 0.4019, "step": 11088 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.423080913039656e-06, "loss": 0.3828, "step": 11089 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.421307641041701e-06, "loss": 0.4227, "step": 11090 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.4195345511919716e-06, "loss": 0.4514, "step": 11091 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.417761643561005e-06, "loss": 0.4079, "step": 11092 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.415988918219329e-06, "loss": 0.4094, "step": 11093 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.414216375237468e-06, "loss": 0.3565, "step": 11094 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.412444014685935e-06, "loss": 0.4526, "step": 11095 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.410671836635237e-06, "loss": 0.4095, "step": 11096 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.408899841155877e-06, "loss": 0.3302, "step": 11097 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.4071280283183445e-06, "loss": 0.4573, "step": 11098 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.405356398193129e-06, "loss": 0.4573, "step": 11099 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.403584950850702e-06, "loss": 0.5404, "step": 11100 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.401813686361544e-06, "loss": 0.4343, "step": 11101 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.400042604796114e-06, "loss": 0.4608, "step": 11102 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.398271706224875e-06, "loss": 0.373, "step": 11103 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.396500990718266e-06, "loss": 0.4598, "step": 11104 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.394730458346728e-06, "loss": 0.4327, "step": 11105 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.392960109180707e-06, "loss": 0.3881, "step": 11106 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.391189943290625e-06, "loss": 0.4376, "step": 11107 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.3894199607469035e-06, "loss": 0.4774, "step": 11108 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.387650161619953e-06, "loss": 0.3827, "step": 11109 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.385880545980181e-06, "loss": 0.429, "step": 11110 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.384111113897985e-06, "loss": 0.4677, "step": 11111 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.382341865443756e-06, "loss": 0.4481, "step": 11112 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.38057280068788e-06, "loss": 0.4162, "step": 11113 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.3788039197007304e-06, "loss": 0.3909, "step": 11114 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.377035222552677e-06, "loss": 0.4393, "step": 11115 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.3752667093140776e-06, "loss": 0.4058, "step": 11116 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.373498380055295e-06, "loss": 0.4224, "step": 11117 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.371730234846677e-06, "loss": 0.3789, "step": 11118 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.369962273758555e-06, "loss": 0.4518, "step": 11119 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.3681944968612665e-06, "loss": 0.4646, "step": 11120 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.3664269042251305e-06, "loss": 0.4253, "step": 11121 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.364659495920475e-06, "loss": 0.4179, "step": 11122 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.362892272017604e-06, "loss": 0.5279, "step": 11123 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.361125232586824e-06, "loss": 0.4338, "step": 11124 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.359358377698431e-06, "loss": 0.376, "step": 11125 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.357591707422705e-06, "loss": 0.4143, "step": 11126 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.355825221829938e-06, "loss": 0.4386, "step": 11127 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.3540589209903985e-06, "loss": 0.4142, "step": 11128 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.352292804974354e-06, "loss": 0.4348, "step": 11129 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.3505268738520646e-06, "loss": 0.3782, "step": 11130 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.34876112769378e-06, "loss": 0.4348, "step": 11131 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.346995566569747e-06, "loss": 0.514, "step": 11132 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.345230190550201e-06, "loss": 0.3844, "step": 11133 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.343464999705372e-06, "loss": 0.3922, "step": 11134 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.3416999941054825e-06, "loss": 0.3941, "step": 11135 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.339935173820747e-06, "loss": 0.4098, "step": 11136 }, { "epoch": 2.01, "grad_norm": 0.0, "learning_rate": 5.338170538921373e-06, "loss": 0.4817, "step": 11137 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.336406089477558e-06, "loss": 0.3944, "step": 11138 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.334641825559503e-06, "loss": 0.4533, "step": 11139 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.332877747237386e-06, "loss": 0.3152, "step": 11140 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.331113854581393e-06, "loss": 0.5116, "step": 11141 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.329350147661685e-06, "loss": 0.433, "step": 11142 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.327586626548424e-06, "loss": 0.4964, "step": 11143 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.3258232913117765e-06, "loss": 0.4443, "step": 11144 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.324060142021885e-06, "loss": 0.364, "step": 11145 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.322297178748892e-06, "loss": 0.4273, "step": 11146 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.320534401562931e-06, "loss": 0.4199, "step": 11147 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.318771810534128e-06, "loss": 0.5054, "step": 11148 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.317009405732601e-06, "loss": 0.4601, "step": 11149 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.315247187228463e-06, "loss": 0.4445, "step": 11150 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.3134851550918155e-06, "loss": 0.4227, "step": 11151 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.311723309392759e-06, "loss": 0.4888, "step": 11152 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.30996165020138e-06, "loss": 0.4704, "step": 11153 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.3082001775877565e-06, "loss": 0.5138, "step": 11154 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.306438891621972e-06, "loss": 0.3775, "step": 11155 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.304677792374093e-06, "loss": 0.4447, "step": 11156 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.302916879914171e-06, "loss": 0.458, "step": 11157 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.301156154312261e-06, "loss": 0.3983, "step": 11158 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.2993956156384065e-06, "loss": 0.4353, "step": 11159 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.297635263962651e-06, "loss": 0.4114, "step": 11160 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.295875099355019e-06, "loss": 0.4208, "step": 11161 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.294115121885535e-06, "loss": 0.4823, "step": 11162 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.292355331624215e-06, "loss": 0.4545, "step": 11163 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.2905957286410595e-06, "loss": 0.4003, "step": 11164 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.288836313006076e-06, "loss": 0.4351, "step": 11165 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.2870770847892535e-06, "loss": 0.4065, "step": 11166 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.285318044060581e-06, "loss": 0.4584, "step": 11167 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.283559190890031e-06, "loss": 0.3863, "step": 11168 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.2818005253475775e-06, "loss": 0.3543, "step": 11169 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.280042047503177e-06, "loss": 0.4138, "step": 11170 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.278283757426799e-06, "loss": 0.3949, "step": 11171 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.276525655188376e-06, "loss": 0.4263, "step": 11172 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.274767740857856e-06, "loss": 0.3815, "step": 11173 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.273010014505169e-06, "loss": 0.4533, "step": 11174 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.271252476200237e-06, "loss": 0.4482, "step": 11175 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.269495126012987e-06, "loss": 0.418, "step": 11176 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.267737964013324e-06, "loss": 0.4417, "step": 11177 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.265980990271156e-06, "loss": 0.4263, "step": 11178 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.264224204856372e-06, "loss": 0.4067, "step": 11179 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.262467607838859e-06, "loss": 0.4393, "step": 11180 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.260711199288498e-06, "loss": 0.3646, "step": 11181 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.258954979275168e-06, "loss": 0.4577, "step": 11182 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.257198947868731e-06, "loss": 0.4044, "step": 11183 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.255443105139044e-06, "loss": 0.4624, "step": 11184 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.253687451155959e-06, "loss": 0.4546, "step": 11185 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.251931985989317e-06, "loss": 0.4362, "step": 11186 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.250176709708955e-06, "loss": 0.4212, "step": 11187 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.248421622384701e-06, "loss": 0.4194, "step": 11188 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.246666724086374e-06, "loss": 0.479, "step": 11189 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.2449120148837875e-06, "loss": 0.4189, "step": 11190 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.243157494846748e-06, "loss": 0.4711, "step": 11191 }, { "epoch": 2.02, "grad_norm": 0.0, "learning_rate": 5.241403164045047e-06, "loss": 0.4605, "step": 11192 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.239649022548485e-06, "loss": 0.4115, "step": 11193 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.237895070426844e-06, "loss": 0.4446, "step": 11194 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.23614130774989e-06, "loss": 0.4207, "step": 11195 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.234387734587396e-06, "loss": 0.4432, "step": 11196 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.232634351009119e-06, "loss": 0.401, "step": 11197 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.230881157084817e-06, "loss": 0.4114, "step": 11198 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.229128152884233e-06, "loss": 0.456, "step": 11199 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.227375338477105e-06, "loss": 0.504, "step": 11200 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.225622713933165e-06, "loss": 0.3931, "step": 11201 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.223870279322125e-06, "loss": 0.4285, "step": 11202 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.22211803471371e-06, "loss": 0.4586, "step": 11203 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.220365980177626e-06, "loss": 0.3272, "step": 11204 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.2186141157835715e-06, "loss": 0.4429, "step": 11205 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.216862441601236e-06, "loss": 0.3576, "step": 11206 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.215110957700309e-06, "loss": 0.4519, "step": 11207 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.2133596641504595e-06, "loss": 0.359, "step": 11208 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.211608561021372e-06, "loss": 0.406, "step": 11209 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.209857648382693e-06, "loss": 0.3997, "step": 11210 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.208106926304084e-06, "loss": 0.3846, "step": 11211 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.2063563948551905e-06, "loss": 0.4713, "step": 11212 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.204606054105647e-06, "loss": 0.4841, "step": 11213 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.202855904125095e-06, "loss": 0.4625, "step": 11214 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.201105944983152e-06, "loss": 0.4315, "step": 11215 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.1993561767494395e-06, "loss": 0.4249, "step": 11216 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.197606599493558e-06, "loss": 0.4373, "step": 11217 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.195857213285109e-06, "loss": 0.4418, "step": 11218 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.194108018193695e-06, "loss": 0.4018, "step": 11219 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.192359014288897e-06, "loss": 0.4613, "step": 11220 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.190610201640292e-06, "loss": 0.3942, "step": 11221 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.188861580317452e-06, "loss": 0.4411, "step": 11222 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.187113150389942e-06, "loss": 0.4125, "step": 11223 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.1853649119273145e-06, "loss": 0.4578, "step": 11224 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.183616864999119e-06, "loss": 0.4021, "step": 11225 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.1818690096748955e-06, "loss": 0.4126, "step": 11226 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.180121346024176e-06, "loss": 0.5523, "step": 11227 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.178373874116488e-06, "loss": 0.494, "step": 11228 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.1766265940213455e-06, "loss": 0.4349, "step": 11229 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.1748795058082565e-06, "loss": 0.4178, "step": 11230 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.173132609546736e-06, "loss": 0.4673, "step": 11231 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.1713859053062636e-06, "loss": 0.4594, "step": 11232 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.1696393931563315e-06, "loss": 0.4767, "step": 11233 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.1678930731664215e-06, "loss": 0.443, "step": 11234 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.166146945405996e-06, "loss": 0.3756, "step": 11235 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.164401009944532e-06, "loss": 0.404, "step": 11236 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.16265526685148e-06, "loss": 0.4375, "step": 11237 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.160909716196285e-06, "loss": 0.4562, "step": 11238 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.159164358048399e-06, "loss": 0.4756, "step": 11239 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.157419192477238e-06, "loss": 0.4573, "step": 11240 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.15567421955224e-06, "loss": 0.3811, "step": 11241 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.153929439342823e-06, "loss": 0.4178, "step": 11242 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.1521848519183936e-06, "loss": 0.4044, "step": 11243 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.150440457348355e-06, "loss": 0.3886, "step": 11244 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.148696255702104e-06, "loss": 0.3993, "step": 11245 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.146952247049022e-06, "loss": 0.3991, "step": 11246 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 5.145208431458501e-06, "loss": 0.4427, "step": 11247 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.143464808999901e-06, "loss": 0.4202, "step": 11248 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.141721379742591e-06, "loss": 0.4276, "step": 11249 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.1399781437559286e-06, "loss": 0.4142, "step": 11250 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.138235101109255e-06, "loss": 0.4347, "step": 11251 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.136492251871923e-06, "loss": 0.4302, "step": 11252 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.13474959611326e-06, "loss": 0.4947, "step": 11253 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.133007133902598e-06, "loss": 0.4785, "step": 11254 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.131264865309244e-06, "loss": 0.4383, "step": 11255 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.129522790402512e-06, "loss": 0.4707, "step": 11256 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.1277809092517115e-06, "loss": 0.4075, "step": 11257 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.126039221926131e-06, "loss": 0.4725, "step": 11258 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.1242977284950616e-06, "loss": 0.5279, "step": 11259 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.122556429027781e-06, "loss": 0.3904, "step": 11260 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.12081532359356e-06, "loss": 0.4168, "step": 11261 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.119074412261664e-06, "loss": 0.5064, "step": 11262 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.11733369510135e-06, "loss": 0.4787, "step": 11263 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.115593172181866e-06, "loss": 0.3978, "step": 11264 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.113852843572453e-06, "loss": 0.4487, "step": 11265 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.112112709342344e-06, "loss": 0.3892, "step": 11266 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.1103727695607655e-06, "loss": 0.44, "step": 11267 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.108633024296929e-06, "loss": 0.4253, "step": 11268 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.10689347362006e-06, "loss": 0.4251, "step": 11269 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.105154117599347e-06, "loss": 0.3805, "step": 11270 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.103414956303987e-06, "loss": 0.4378, "step": 11271 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.10167598980317e-06, "loss": 0.4764, "step": 11272 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.099937218166069e-06, "loss": 0.454, "step": 11273 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.098198641461862e-06, "loss": 0.4614, "step": 11274 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.0964602597597125e-06, "loss": 0.4168, "step": 11275 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.094722073128773e-06, "loss": 0.4451, "step": 11276 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.092984081638197e-06, "loss": 0.4038, "step": 11277 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.091246285357112e-06, "loss": 0.4416, "step": 11278 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.089508684354664e-06, "loss": 0.4448, "step": 11279 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.087771278699971e-06, "loss": 0.4688, "step": 11280 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.0860340684621535e-06, "loss": 0.4188, "step": 11281 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.0842970537103176e-06, "loss": 0.4772, "step": 11282 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.082560234513566e-06, "loss": 0.4007, "step": 11283 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.080823610940992e-06, "loss": 0.4189, "step": 11284 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.079087183061684e-06, "loss": 0.5308, "step": 11285 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.077350950944715e-06, "loss": 0.4117, "step": 11286 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.075614914659159e-06, "loss": 0.3878, "step": 11287 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.0738790742740795e-06, "loss": 0.4238, "step": 11288 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.072143429858523e-06, "loss": 0.5206, "step": 11289 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.070407981481547e-06, "loss": 0.5202, "step": 11290 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.068672729212188e-06, "loss": 0.4217, "step": 11291 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.0669376731194786e-06, "loss": 0.4432, "step": 11292 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.065202813272435e-06, "loss": 0.3786, "step": 11293 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.063468149740074e-06, "loss": 0.4864, "step": 11294 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.06173368259141e-06, "loss": 0.4489, "step": 11295 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.0599994118954395e-06, "loss": 0.4452, "step": 11296 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.0582653377211565e-06, "loss": 0.4311, "step": 11297 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.056531460137544e-06, "loss": 0.4576, "step": 11298 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.0547977792135785e-06, "loss": 0.3526, "step": 11299 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.053064295018227e-06, "loss": 0.356, "step": 11300 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.051331007620455e-06, "loss": 0.4249, "step": 11301 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.049597917089211e-06, "loss": 0.373, "step": 11302 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 5.0478650234934435e-06, "loss": 0.4906, "step": 11303 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.046132326902089e-06, "loss": 0.4427, "step": 11304 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.0443998273840725e-06, "loss": 0.4264, "step": 11305 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.0426675250083245e-06, "loss": 0.4258, "step": 11306 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.040935419843758e-06, "loss": 0.4004, "step": 11307 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.039203511959272e-06, "loss": 0.408, "step": 11308 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.03747180142377e-06, "loss": 0.4175, "step": 11309 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.035740288306136e-06, "loss": 0.3518, "step": 11310 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.034008972675262e-06, "loss": 0.4882, "step": 11311 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.032277854600017e-06, "loss": 0.4391, "step": 11312 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.030546934149271e-06, "loss": 0.5099, "step": 11313 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.02881621139188e-06, "loss": 0.426, "step": 11314 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.027085686396696e-06, "loss": 0.4203, "step": 11315 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.025355359232564e-06, "loss": 0.4758, "step": 11316 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.023625229968317e-06, "loss": 0.408, "step": 11317 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.021895298672781e-06, "loss": 0.4472, "step": 11318 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.02016556541478e-06, "loss": 0.4155, "step": 11319 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.018436030263125e-06, "loss": 0.5033, "step": 11320 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.016706693286617e-06, "loss": 0.4438, "step": 11321 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.014977554554054e-06, "loss": 0.4176, "step": 11322 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.013248614134223e-06, "loss": 0.4507, "step": 11323 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.0115198720959065e-06, "loss": 0.4232, "step": 11324 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.009791328507874e-06, "loss": 0.3482, "step": 11325 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.0080629834388914e-06, "loss": 0.417, "step": 11326 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.006334836957712e-06, "loss": 0.4541, "step": 11327 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.00460688913309e-06, "loss": 0.4198, "step": 11328 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.002879140033764e-06, "loss": 0.3962, "step": 11329 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 5.0011515897284705e-06, "loss": 0.474, "step": 11330 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.999424238285926e-06, "loss": 0.454, "step": 11331 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.997697085774848e-06, "loss": 0.419, "step": 11332 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.995970132263953e-06, "loss": 0.4618, "step": 11333 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.994243377821939e-06, "loss": 0.4754, "step": 11334 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.992516822517498e-06, "loss": 0.422, "step": 11335 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.9907904664193165e-06, "loss": 0.4388, "step": 11336 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.989064309596073e-06, "loss": 0.4188, "step": 11337 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.987338352116433e-06, "loss": 0.3771, "step": 11338 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.985612594049062e-06, "loss": 0.3549, "step": 11339 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.983887035462612e-06, "loss": 0.3726, "step": 11340 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.982161676425727e-06, "loss": 0.3817, "step": 11341 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.980436517007049e-06, "loss": 0.4076, "step": 11342 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.978711557275201e-06, "loss": 0.4069, "step": 11343 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.9769867972988114e-06, "loss": 0.4119, "step": 11344 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.9752622371464975e-06, "loss": 0.4246, "step": 11345 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.973537876886856e-06, "loss": 0.4193, "step": 11346 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.971813716588487e-06, "loss": 0.3845, "step": 11347 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.970089756319979e-06, "loss": 0.5267, "step": 11348 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.968365996149922e-06, "loss": 0.402, "step": 11349 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.966642436146884e-06, "loss": 0.4267, "step": 11350 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.964919076379433e-06, "loss": 0.4209, "step": 11351 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.963195916916127e-06, "loss": 0.3927, "step": 11352 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.961472957825515e-06, "loss": 0.4405, "step": 11353 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.959750199176141e-06, "loss": 0.4798, "step": 11354 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.958027641036539e-06, "loss": 0.4329, "step": 11355 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.956305283475233e-06, "loss": 0.4356, "step": 11356 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.954583126560746e-06, "loss": 0.4591, "step": 11357 }, { "epoch": 2.05, "grad_norm": 0.0, "learning_rate": 4.952861170361584e-06, "loss": 0.4804, "step": 11358 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.95113941494625e-06, "loss": 0.4069, "step": 11359 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.94941786038324e-06, "loss": 0.4595, "step": 11360 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.94769650674104e-06, "loss": 0.4758, "step": 11361 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.945975354088127e-06, "loss": 0.4758, "step": 11362 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.944254402492973e-06, "loss": 0.4387, "step": 11363 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.94253365202404e-06, "loss": 0.4519, "step": 11364 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.940813102749779e-06, "loss": 0.4082, "step": 11365 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.939092754738643e-06, "loss": 0.4095, "step": 11366 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.937372608059067e-06, "loss": 0.4765, "step": 11367 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.935652662779484e-06, "loss": 0.4506, "step": 11368 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.933932918968312e-06, "loss": 0.4117, "step": 11369 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.932213376693962e-06, "loss": 0.4643, "step": 11370 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.93049403602485e-06, "loss": 0.3868, "step": 11371 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.928774897029369e-06, "loss": 0.3999, "step": 11372 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.927055959775911e-06, "loss": 0.428, "step": 11373 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.9253372243328564e-06, "loss": 0.4026, "step": 11374 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.923618690768581e-06, "loss": 0.4145, "step": 11375 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.92190035915145e-06, "loss": 0.4446, "step": 11376 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.920182229549822e-06, "loss": 0.468, "step": 11377 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.918464302032047e-06, "loss": 0.4636, "step": 11378 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.916746576666468e-06, "loss": 0.4714, "step": 11379 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.915029053521419e-06, "loss": 0.377, "step": 11380 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.9133117326652205e-06, "loss": 0.4898, "step": 11381 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.911594614166199e-06, "loss": 0.3862, "step": 11382 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.909877698092663e-06, "loss": 0.401, "step": 11383 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.9081609845129104e-06, "loss": 0.3664, "step": 11384 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.906444473495236e-06, "loss": 0.406, "step": 11385 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.9047281651079215e-06, "loss": 0.4473, "step": 11386 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.903012059419254e-06, "loss": 0.353, "step": 11387 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.901296156497498e-06, "loss": 0.3797, "step": 11388 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.899580456410916e-06, "loss": 0.3462, "step": 11389 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.897864959227765e-06, "loss": 0.4, "step": 11390 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.896149665016278e-06, "loss": 0.3803, "step": 11391 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.894434573844705e-06, "loss": 0.4474, "step": 11392 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.892719685781271e-06, "loss": 0.3723, "step": 11393 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.891005000894196e-06, "loss": 0.4635, "step": 11394 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.889290519251695e-06, "loss": 0.403, "step": 11395 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.887576240921973e-06, "loss": 0.4025, "step": 11396 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.885862165973222e-06, "loss": 0.4245, "step": 11397 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.884148294473642e-06, "loss": 0.4902, "step": 11398 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.882434626491404e-06, "loss": 0.498, "step": 11399 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.880721162094682e-06, "loss": 0.4192, "step": 11400 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.879007901351643e-06, "loss": 0.4503, "step": 11401 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.877294844330438e-06, "loss": 0.4472, "step": 11402 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.875581991099224e-06, "loss": 0.4823, "step": 11403 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.873869341726137e-06, "loss": 0.478, "step": 11404 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.87215689627931e-06, "loss": 0.463, "step": 11405 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.870444654826869e-06, "loss": 0.4259, "step": 11406 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.86873261743692e-06, "loss": 0.3661, "step": 11407 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.867020784177581e-06, "loss": 0.3696, "step": 11408 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.865309155116948e-06, "loss": 0.386, "step": 11409 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.863597730323114e-06, "loss": 0.4353, "step": 11410 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.861886509864161e-06, "loss": 0.4426, "step": 11411 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.860175493808165e-06, "loss": 0.4119, "step": 11412 }, { "epoch": 2.06, "grad_norm": 0.0, "learning_rate": 4.858464682223193e-06, "loss": 0.3866, "step": 11413 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.856754075177304e-06, "loss": 0.375, "step": 11414 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.855043672738548e-06, "loss": 0.4945, "step": 11415 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.853333474974969e-06, "loss": 0.4194, "step": 11416 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.8516234819546e-06, "loss": 0.461, "step": 11417 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.849913693745471e-06, "loss": 0.4304, "step": 11418 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.848204110415592e-06, "loss": 0.4304, "step": 11419 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.846494732032983e-06, "loss": 0.4158, "step": 11420 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.844785558665645e-06, "loss": 0.4247, "step": 11421 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.843076590381567e-06, "loss": 0.4017, "step": 11422 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.8413678272487365e-06, "loss": 0.4736, "step": 11423 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.839659269335126e-06, "loss": 0.4507, "step": 11424 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.8379509167087145e-06, "loss": 0.4388, "step": 11425 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.836242769437458e-06, "loss": 0.4118, "step": 11426 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.83453482758931e-06, "loss": 0.4712, "step": 11427 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.832827091232221e-06, "loss": 0.3896, "step": 11428 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.831119560434112e-06, "loss": 0.3732, "step": 11429 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.829412235262927e-06, "loss": 0.3999, "step": 11430 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.827705115786581e-06, "loss": 0.4257, "step": 11431 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.8259982020729855e-06, "loss": 0.4245, "step": 11432 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.824291494190047e-06, "loss": 0.4272, "step": 11433 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.822584992205657e-06, "loss": 0.3973, "step": 11434 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.820878696187702e-06, "loss": 0.4124, "step": 11435 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.8191726062040734e-06, "loss": 0.4687, "step": 11436 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.817466722322629e-06, "loss": 0.3928, "step": 11437 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.815761044611237e-06, "loss": 0.4114, "step": 11438 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.814055573137751e-06, "loss": 0.4499, "step": 11439 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.812350307970016e-06, "loss": 0.4269, "step": 11440 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.810645249175876e-06, "loss": 0.4051, "step": 11441 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.808940396823157e-06, "loss": 0.43, "step": 11442 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.8072357509796866e-06, "loss": 0.4391, "step": 11443 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.805531311713269e-06, "loss": 0.3727, "step": 11444 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.803827079091712e-06, "loss": 0.4738, "step": 11445 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.8021230531828175e-06, "loss": 0.3978, "step": 11446 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.8004192340543735e-06, "loss": 0.4347, "step": 11447 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.7987156217741594e-06, "loss": 0.404, "step": 11448 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.797012216409947e-06, "loss": 0.3632, "step": 11449 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.795309018029504e-06, "loss": 0.401, "step": 11450 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.793606026700583e-06, "loss": 0.4764, "step": 11451 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.791903242490934e-06, "loss": 0.4315, "step": 11452 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.790200665468295e-06, "loss": 0.4048, "step": 11453 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.7884982957004e-06, "loss": 0.4231, "step": 11454 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.7867961332549695e-06, "loss": 0.4791, "step": 11455 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.785094178199719e-06, "loss": 0.3975, "step": 11456 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.783392430602354e-06, "loss": 0.4474, "step": 11457 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.781690890530576e-06, "loss": 0.3914, "step": 11458 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.779989558052079e-06, "loss": 0.4394, "step": 11459 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.778288433234536e-06, "loss": 0.4458, "step": 11460 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.776587516145625e-06, "loss": 0.335, "step": 11461 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.774886806853005e-06, "loss": 0.4457, "step": 11462 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.773186305424343e-06, "loss": 0.412, "step": 11463 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.771486011927285e-06, "loss": 0.4616, "step": 11464 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.76978592642947e-06, "loss": 0.4045, "step": 11465 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.7680860489985345e-06, "loss": 0.4338, "step": 11466 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.766386379702092e-06, "loss": 0.4537, "step": 11467 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.764686918607767e-06, "loss": 0.429, "step": 11468 }, { "epoch": 2.07, "grad_norm": 0.0, "learning_rate": 4.762987665783166e-06, "loss": 0.4456, "step": 11469 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.761288621295888e-06, "loss": 0.5685, "step": 11470 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.759589785213522e-06, "loss": 0.4498, "step": 11471 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.757891157603651e-06, "loss": 0.4598, "step": 11472 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.756192738533847e-06, "loss": 0.4246, "step": 11473 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.754494528071687e-06, "loss": 0.4315, "step": 11474 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.7527965262847165e-06, "loss": 0.4522, "step": 11475 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.751098733240489e-06, "loss": 0.4054, "step": 11476 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.749401149006546e-06, "loss": 0.4103, "step": 11477 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.747703773650416e-06, "loss": 0.4727, "step": 11478 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.7460066072396314e-06, "loss": 0.491, "step": 11479 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.744309649841705e-06, "loss": 0.385, "step": 11480 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.742612901524148e-06, "loss": 0.4299, "step": 11481 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.740916362354452e-06, "loss": 0.4071, "step": 11482 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.739220032400108e-06, "loss": 0.4041, "step": 11483 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.7375239117286076e-06, "loss": 0.4506, "step": 11484 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.7358280004074215e-06, "loss": 0.4484, "step": 11485 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.734132298504016e-06, "loss": 0.3982, "step": 11486 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.732436806085848e-06, "loss": 0.487, "step": 11487 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.730741523220368e-06, "loss": 0.3947, "step": 11488 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.7290464499750164e-06, "loss": 0.4575, "step": 11489 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.7273515864172274e-06, "loss": 0.4491, "step": 11490 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.725656932614423e-06, "loss": 0.3938, "step": 11491 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.723962488634023e-06, "loss": 0.4631, "step": 11492 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.7222682545434325e-06, "loss": 0.4033, "step": 11493 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.7205742304100496e-06, "loss": 0.4195, "step": 11494 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.718880416301272e-06, "loss": 0.388, "step": 11495 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.717186812284476e-06, "loss": 0.4481, "step": 11496 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.715493418427045e-06, "loss": 0.3523, "step": 11497 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.713800234796334e-06, "loss": 0.4427, "step": 11498 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.712107261459702e-06, "loss": 0.4797, "step": 11499 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.710414498484504e-06, "loss": 0.4687, "step": 11500 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.70872194593808e-06, "loss": 0.4164, "step": 11501 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.707029603887762e-06, "loss": 0.4047, "step": 11502 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.705337472400873e-06, "loss": 0.432, "step": 11503 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.70364555154473e-06, "loss": 0.4152, "step": 11504 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.70195384138664e-06, "loss": 0.3952, "step": 11505 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.700262341993902e-06, "loss": 0.4719, "step": 11506 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.698571053433807e-06, "loss": 0.4177, "step": 11507 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.696879975773637e-06, "loss": 0.3895, "step": 11508 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.695189109080667e-06, "loss": 0.4209, "step": 11509 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.6934984534221615e-06, "loss": 0.4723, "step": 11510 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.691808008865374e-06, "loss": 0.4139, "step": 11511 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.690117775477565e-06, "loss": 0.5116, "step": 11512 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.688427753325964e-06, "loss": 0.4115, "step": 11513 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.6867379424778055e-06, "loss": 0.3998, "step": 11514 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.685048343000315e-06, "loss": 0.4181, "step": 11515 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.683358954960702e-06, "loss": 0.419, "step": 11516 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.681669778426181e-06, "loss": 0.4122, "step": 11517 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.679980813463947e-06, "loss": 0.3952, "step": 11518 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.678292060141194e-06, "loss": 0.4504, "step": 11519 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.676603518525096e-06, "loss": 0.3929, "step": 11520 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.674915188682826e-06, "loss": 0.4784, "step": 11521 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.6732270706815555e-06, "loss": 0.4312, "step": 11522 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.671539164588437e-06, "loss": 0.2916, "step": 11523 }, { "epoch": 2.08, "grad_norm": 0.0, "learning_rate": 4.669851470470619e-06, "loss": 0.408, "step": 11524 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.66816398839524e-06, "loss": 0.4052, "step": 11525 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.666476718429433e-06, "loss": 0.4236, "step": 11526 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.664789660640318e-06, "loss": 0.4755, "step": 11527 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.663102815095008e-06, "loss": 0.3576, "step": 11528 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.661416181860614e-06, "loss": 0.3645, "step": 11529 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.659729761004228e-06, "loss": 0.4764, "step": 11530 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.658043552592941e-06, "loss": 0.418, "step": 11531 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.656357556693829e-06, "loss": 0.4368, "step": 11532 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.654671773373971e-06, "loss": 0.3849, "step": 11533 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.652986202700431e-06, "loss": 0.3933, "step": 11534 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.651300844740257e-06, "loss": 0.5016, "step": 11535 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.649615699560496e-06, "loss": 0.4518, "step": 11536 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.647930767228186e-06, "loss": 0.4533, "step": 11537 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.646246047810362e-06, "loss": 0.3924, "step": 11538 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.644561541374042e-06, "loss": 0.481, "step": 11539 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.642877247986237e-06, "loss": 0.394, "step": 11540 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.641193167713954e-06, "loss": 0.4236, "step": 11541 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.639509300624187e-06, "loss": 0.3907, "step": 11542 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.637825646783922e-06, "loss": 0.4147, "step": 11543 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.6361422062601395e-06, "loss": 0.4437, "step": 11544 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.634458979119808e-06, "loss": 0.4339, "step": 11545 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.6327759654298894e-06, "loss": 0.4705, "step": 11546 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.631093165257339e-06, "loss": 0.4272, "step": 11547 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.629410578669099e-06, "loss": 0.4627, "step": 11548 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.627728205732104e-06, "loss": 0.3828, "step": 11549 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.626046046513291e-06, "loss": 0.4427, "step": 11550 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.624364101079567e-06, "loss": 0.3766, "step": 11551 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.622682369497849e-06, "loss": 0.4349, "step": 11552 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.621000851835039e-06, "loss": 0.4417, "step": 11553 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.619319548158027e-06, "loss": 0.4306, "step": 11554 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.617638458533703e-06, "loss": 0.4145, "step": 11555 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.615957583028942e-06, "loss": 0.4745, "step": 11556 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.614276921710615e-06, "loss": 0.4812, "step": 11557 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.612596474645576e-06, "loss": 0.463, "step": 11558 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.610916241900674e-06, "loss": 0.4534, "step": 11559 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.609236223542759e-06, "loss": 0.4605, "step": 11560 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.607556419638662e-06, "loss": 0.4593, "step": 11561 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.60587683025521e-06, "loss": 0.4789, "step": 11562 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.604197455459216e-06, "loss": 0.3821, "step": 11563 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.602518295317492e-06, "loss": 0.4478, "step": 11564 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.600839349896836e-06, "loss": 0.4358, "step": 11565 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.59916061926404e-06, "loss": 0.365, "step": 11566 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.597482103485887e-06, "loss": 0.4479, "step": 11567 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.595803802629152e-06, "loss": 0.4381, "step": 11568 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.594125716760598e-06, "loss": 0.4096, "step": 11569 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.592447845946981e-06, "loss": 0.4714, "step": 11570 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.590770190255055e-06, "loss": 0.3791, "step": 11571 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.589092749751563e-06, "loss": 0.3988, "step": 11572 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.587415524503225e-06, "loss": 0.4246, "step": 11573 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.585738514576771e-06, "loss": 0.488, "step": 11574 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.58406172003891e-06, "loss": 0.3809, "step": 11575 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.5823851409563555e-06, "loss": 0.3673, "step": 11576 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.580708777395801e-06, "loss": 0.4128, "step": 11577 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.579032629423935e-06, "loss": 0.3612, "step": 11578 }, { "epoch": 2.09, "grad_norm": 0.0, "learning_rate": 4.577356697107438e-06, "loss": 0.4046, "step": 11579 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.57568098051298e-06, "loss": 0.4435, "step": 11580 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.574005479707227e-06, "loss": 0.363, "step": 11581 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.5723301947568285e-06, "loss": 0.4754, "step": 11582 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.570655125728435e-06, "loss": 0.4602, "step": 11583 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.568980272688681e-06, "loss": 0.4382, "step": 11584 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.567305635704196e-06, "loss": 0.4699, "step": 11585 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.565631214841595e-06, "loss": 0.3829, "step": 11586 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.563957010167501e-06, "loss": 0.4592, "step": 11587 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.562283021748507e-06, "loss": 0.3783, "step": 11588 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.56060924965121e-06, "loss": 0.4352, "step": 11589 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.558935693942195e-06, "loss": 0.4454, "step": 11590 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.557262354688035e-06, "loss": 0.4163, "step": 11591 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.555589231955305e-06, "loss": 0.3734, "step": 11592 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.5539163258105635e-06, "loss": 0.4733, "step": 11593 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.55224363632036e-06, "loss": 0.4445, "step": 11594 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.550571163551242e-06, "loss": 0.4328, "step": 11595 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.54889890756973e-06, "loss": 0.4321, "step": 11596 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.54722686844236e-06, "loss": 0.3938, "step": 11597 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.545555046235649e-06, "loss": 0.4258, "step": 11598 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.543883441016101e-06, "loss": 0.3939, "step": 11599 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.542212052850216e-06, "loss": 0.4337, "step": 11600 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.540540881804486e-06, "loss": 0.453, "step": 11601 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.538869927945392e-06, "loss": 0.3921, "step": 11602 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.5371991913394075e-06, "loss": 0.4806, "step": 11603 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.535528672052998e-06, "loss": 0.3796, "step": 11604 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.533858370152619e-06, "loss": 0.5009, "step": 11605 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.532188285704717e-06, "loss": 0.4075, "step": 11606 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.530518418775734e-06, "loss": 0.4782, "step": 11607 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.528848769432093e-06, "loss": 0.4837, "step": 11608 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.527179337740224e-06, "loss": 0.4476, "step": 11609 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.52551012376654e-06, "loss": 0.389, "step": 11610 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.523841127577439e-06, "loss": 0.4753, "step": 11611 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.522172349239319e-06, "loss": 0.4069, "step": 11612 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.520503788818562e-06, "loss": 0.4327, "step": 11613 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.518835446381555e-06, "loss": 0.4559, "step": 11614 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.517167321994666e-06, "loss": 0.4908, "step": 11615 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.515499415724252e-06, "loss": 0.4659, "step": 11616 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.513831727636666e-06, "loss": 0.4412, "step": 11617 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.512164257798252e-06, "loss": 0.3721, "step": 11618 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.5104970062753465e-06, "loss": 0.418, "step": 11619 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.5088299731342735e-06, "loss": 0.3612, "step": 11620 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.507163158441351e-06, "loss": 0.3774, "step": 11621 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.505496562262888e-06, "loss": 0.4041, "step": 11622 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.503830184665184e-06, "loss": 0.4528, "step": 11623 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.5021640257145275e-06, "loss": 0.4194, "step": 11624 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.500498085477211e-06, "loss": 0.4115, "step": 11625 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.498832364019499e-06, "loss": 0.4176, "step": 11626 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.497166861407659e-06, "loss": 0.4165, "step": 11627 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.495501577707948e-06, "loss": 0.4337, "step": 11628 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.49383651298661e-06, "loss": 0.4845, "step": 11629 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.492171667309893e-06, "loss": 0.4532, "step": 11630 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.4905070407440225e-06, "loss": 0.468, "step": 11631 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.4888426333552194e-06, "loss": 0.39, "step": 11632 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.487178445209701e-06, "loss": 0.4136, "step": 11633 }, { "epoch": 2.1, "grad_norm": 0.0, "learning_rate": 4.485514476373661e-06, "loss": 0.3733, "step": 11634 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.483850726913305e-06, "loss": 0.4617, "step": 11635 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.482187196894818e-06, "loss": 0.4899, "step": 11636 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.480523886384376e-06, "loss": 0.489, "step": 11637 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.47886079544815e-06, "loss": 0.4763, "step": 11638 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.477197924152299e-06, "loss": 0.4692, "step": 11639 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.475535272562976e-06, "loss": 0.3692, "step": 11640 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4738728407463235e-06, "loss": 0.4998, "step": 11641 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.472210628768477e-06, "loss": 0.4305, "step": 11642 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.470548636695561e-06, "loss": 0.3768, "step": 11643 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4688868645936925e-06, "loss": 0.4301, "step": 11644 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4672253125289785e-06, "loss": 0.3688, "step": 11645 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4655639805675174e-06, "loss": 0.4014, "step": 11646 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.463902868775406e-06, "loss": 0.4743, "step": 11647 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.462241977218726e-06, "loss": 0.3752, "step": 11648 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.460581305963543e-06, "loss": 0.4077, "step": 11649 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.458920855075925e-06, "loss": 0.323, "step": 11650 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.457260624621924e-06, "loss": 0.4351, "step": 11651 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.455600614667594e-06, "loss": 0.4366, "step": 11652 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.45394082527897e-06, "loss": 0.3751, "step": 11653 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.452281256522082e-06, "loss": 0.513, "step": 11654 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.450621908462949e-06, "loss": 0.4205, "step": 11655 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.448962781167582e-06, "loss": 0.4046, "step": 11656 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.447303874701986e-06, "loss": 0.4608, "step": 11657 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.445645189132154e-06, "loss": 0.4546, "step": 11658 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.443986724524071e-06, "loss": 0.4273, "step": 11659 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.442328480943714e-06, "loss": 0.4425, "step": 11660 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.440670458457051e-06, "loss": 0.4203, "step": 11661 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.439012657130037e-06, "loss": 0.48, "step": 11662 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4373550770286344e-06, "loss": 0.3883, "step": 11663 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.435697718218771e-06, "loss": 0.3794, "step": 11664 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.434040580766386e-06, "loss": 0.4835, "step": 11665 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.432383664737401e-06, "loss": 0.3444, "step": 11666 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.430726970197727e-06, "loss": 0.4815, "step": 11667 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.42907049721328e-06, "loss": 0.4075, "step": 11668 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.427414245849953e-06, "loss": 0.3943, "step": 11669 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.425758216173631e-06, "loss": 0.4225, "step": 11670 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.424102408250202e-06, "loss": 0.336, "step": 11671 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.422446822145523e-06, "loss": 0.4137, "step": 11672 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.420791457925468e-06, "loss": 0.4214, "step": 11673 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.419136315655887e-06, "loss": 0.4572, "step": 11674 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.417481395402624e-06, "loss": 0.4442, "step": 11675 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4158266972315135e-06, "loss": 0.421, "step": 11676 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.414172221208384e-06, "loss": 0.401, "step": 11677 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.412517967399053e-06, "loss": 0.3922, "step": 11678 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.410863935869328e-06, "loss": 0.426, "step": 11679 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4092101266850105e-06, "loss": 0.4503, "step": 11680 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.407556539911893e-06, "loss": 0.461, "step": 11681 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4059031756157555e-06, "loss": 0.4378, "step": 11682 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4042500338623725e-06, "loss": 0.4932, "step": 11683 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.4025971147175075e-06, "loss": 0.4551, "step": 11684 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.400944418246921e-06, "loss": 0.4384, "step": 11685 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.39929194451636e-06, "loss": 0.4119, "step": 11686 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.397639693591557e-06, "loss": 0.5749, "step": 11687 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.395987665538244e-06, "loss": 0.4034, "step": 11688 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.394335860422139e-06, "loss": 0.4312, "step": 11689 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 4.392684278308959e-06, "loss": 0.4298, "step": 11690 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.391032919264405e-06, "loss": 0.4663, "step": 11691 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.389381783354169e-06, "loss": 0.4464, "step": 11692 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.387730870643942e-06, "loss": 0.4549, "step": 11693 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.386080181199389e-06, "loss": 0.2832, "step": 11694 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.384429715086186e-06, "loss": 0.5174, "step": 11695 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.382779472369989e-06, "loss": 0.3915, "step": 11696 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.381129453116447e-06, "loss": 0.4452, "step": 11697 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3794796573912025e-06, "loss": 0.4352, "step": 11698 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3778300852598845e-06, "loss": 0.385, "step": 11699 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.376180736788113e-06, "loss": 0.4081, "step": 11700 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.374531612041514e-06, "loss": 0.4923, "step": 11701 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3728827110856805e-06, "loss": 0.3614, "step": 11702 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.371234033986212e-06, "loss": 0.4242, "step": 11703 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.369585580808697e-06, "loss": 0.3977, "step": 11704 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.36793735161871e-06, "loss": 0.4086, "step": 11705 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3662893464818255e-06, "loss": 0.3866, "step": 11706 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.364641565463604e-06, "loss": 0.4242, "step": 11707 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.362994008629593e-06, "loss": 0.3022, "step": 11708 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.361346676045341e-06, "loss": 0.3915, "step": 11709 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.359699567776371e-06, "loss": 0.4106, "step": 11710 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3580526838882185e-06, "loss": 0.3834, "step": 11711 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.356406024446395e-06, "loss": 0.4921, "step": 11712 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.354759589516407e-06, "loss": 0.3914, "step": 11713 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3531133791637546e-06, "loss": 0.508, "step": 11714 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.351467393453925e-06, "loss": 0.3954, "step": 11715 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3498216324523986e-06, "loss": 0.3979, "step": 11716 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.348176096224647e-06, "loss": 0.5157, "step": 11717 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.346530784836134e-06, "loss": 0.4842, "step": 11718 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.34488569835231e-06, "loss": 0.4128, "step": 11719 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.343240836838622e-06, "loss": 0.5031, "step": 11720 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.341596200360499e-06, "loss": 0.4114, "step": 11721 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3399517889833764e-06, "loss": 0.333, "step": 11722 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3383076027726676e-06, "loss": 0.4837, "step": 11723 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.336663641793786e-06, "loss": 0.4181, "step": 11724 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.335019906112123e-06, "loss": 0.4018, "step": 11725 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3333763957930675e-06, "loss": 0.4913, "step": 11726 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.331733110902011e-06, "loss": 0.5126, "step": 11727 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.330090051504321e-06, "loss": 0.4073, "step": 11728 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.328447217665363e-06, "loss": 0.4318, "step": 11729 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.32680460945049e-06, "loss": 0.4104, "step": 11730 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.325162226925049e-06, "loss": 0.4007, "step": 11731 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.323520070154375e-06, "loss": 0.3755, "step": 11732 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.3218781392037975e-06, "loss": 0.4069, "step": 11733 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.320236434138635e-06, "loss": 0.4021, "step": 11734 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.318594955024196e-06, "loss": 0.4473, "step": 11735 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.316953701925784e-06, "loss": 0.4861, "step": 11736 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.315312674908689e-06, "loss": 0.5463, "step": 11737 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.31367187403819e-06, "loss": 0.4019, "step": 11738 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.312031299379573e-06, "loss": 0.4793, "step": 11739 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.310390950998091e-06, "loss": 0.4261, "step": 11740 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.308750828959004e-06, "loss": 0.4099, "step": 11741 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.307110933327558e-06, "loss": 0.5426, "step": 11742 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.305471264168989e-06, "loss": 0.3769, "step": 11743 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.303831821548531e-06, "loss": 0.4273, "step": 11744 }, { "epoch": 2.12, "grad_norm": 0.0, "learning_rate": 4.302192605531401e-06, "loss": 0.4371, "step": 11745 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.3005536161828145e-06, "loss": 0.3784, "step": 11746 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.298914853567964e-06, "loss": 0.4125, "step": 11747 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.297276317752046e-06, "loss": 0.4085, "step": 11748 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.295638008800247e-06, "loss": 0.453, "step": 11749 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.293999926777741e-06, "loss": 0.3828, "step": 11750 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.292362071749692e-06, "loss": 0.5176, "step": 11751 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2907244437812576e-06, "loss": 0.415, "step": 11752 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.289087042937586e-06, "loss": 0.4342, "step": 11753 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2874498692838155e-06, "loss": 0.4658, "step": 11754 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.285812922885074e-06, "loss": 0.4253, "step": 11755 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2841762038064835e-06, "loss": 0.4156, "step": 11756 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.282539712113156e-06, "loss": 0.3977, "step": 11757 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.280903447870194e-06, "loss": 0.3549, "step": 11758 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2792674111426856e-06, "loss": 0.4236, "step": 11759 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.277631601995725e-06, "loss": 0.4236, "step": 11760 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.27599602049438e-06, "loss": 0.4235, "step": 11761 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.274360666703723e-06, "loss": 0.404, "step": 11762 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.272725540688804e-06, "loss": 0.4211, "step": 11763 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.271090642514672e-06, "loss": 0.325, "step": 11764 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.26945597224637e-06, "loss": 0.3915, "step": 11765 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.267821529948928e-06, "loss": 0.4551, "step": 11766 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.266187315687364e-06, "loss": 0.4195, "step": 11767 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.264553329526691e-06, "loss": 0.4365, "step": 11768 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.262919571531914e-06, "loss": 0.4087, "step": 11769 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.261286041768022e-06, "loss": 0.4174, "step": 11770 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.259652740300004e-06, "loss": 0.4297, "step": 11771 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.258019667192833e-06, "loss": 0.4458, "step": 11772 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.256386822511477e-06, "loss": 0.4318, "step": 11773 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.254754206320891e-06, "loss": 0.4636, "step": 11774 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2531218186860265e-06, "loss": 0.4404, "step": 11775 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.251489659671817e-06, "loss": 0.4313, "step": 11776 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.249857729343203e-06, "loss": 0.3375, "step": 11777 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.248226027765097e-06, "loss": 0.4129, "step": 11778 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2465945550024115e-06, "loss": 0.4309, "step": 11779 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.24496331112005e-06, "loss": 0.4488, "step": 11780 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.243332296182905e-06, "loss": 0.4403, "step": 11781 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.241701510255867e-06, "loss": 0.4416, "step": 11782 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2400709534038054e-06, "loss": 0.4769, "step": 11783 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.238440625691593e-06, "loss": 0.3583, "step": 11784 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.23681052718408e-06, "loss": 0.3947, "step": 11785 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.235180657946113e-06, "loss": 0.39, "step": 11786 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2335510180425376e-06, "loss": 0.4117, "step": 11787 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.231921607538183e-06, "loss": 0.4399, "step": 11788 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.230292426497866e-06, "loss": 0.4407, "step": 11789 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.228663474986402e-06, "loss": 0.406, "step": 11790 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2270347530685916e-06, "loss": 0.4571, "step": 11791 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.225406260809229e-06, "loss": 0.4516, "step": 11792 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.223777998273095e-06, "loss": 0.3811, "step": 11793 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2221499655249695e-06, "loss": 0.3283, "step": 11794 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.220522162629617e-06, "loss": 0.5067, "step": 11795 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.218894589651793e-06, "loss": 0.3817, "step": 11796 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.2172672466562416e-06, "loss": 0.4414, "step": 11797 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.21564013370771e-06, "loss": 0.4593, "step": 11798 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.214013250870926e-06, "loss": 0.3937, "step": 11799 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 4.212386598210604e-06, "loss": 0.3695, "step": 11800 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.210760175791456e-06, "loss": 0.3878, "step": 11801 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.209133983678184e-06, "loss": 0.4404, "step": 11802 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.207508021935486e-06, "loss": 0.4811, "step": 11803 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.205882290628041e-06, "loss": 0.4347, "step": 11804 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.204256789820525e-06, "loss": 0.502, "step": 11805 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.202631519577601e-06, "loss": 0.3874, "step": 11806 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.201006479963928e-06, "loss": 0.3895, "step": 11807 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.1993816710441525e-06, "loss": 0.4713, "step": 11808 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.1977570928829095e-06, "loss": 0.4069, "step": 11809 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.196132745544829e-06, "loss": 0.391, "step": 11810 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.1945086290945315e-06, "loss": 0.376, "step": 11811 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.192884743596626e-06, "loss": 0.4249, "step": 11812 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.191261089115709e-06, "loss": 0.4378, "step": 11813 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.189637665716382e-06, "loss": 0.4101, "step": 11814 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.188014473463224e-06, "loss": 0.4305, "step": 11815 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.186391512420804e-06, "loss": 0.4894, "step": 11816 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.18476878265369e-06, "loss": 0.4564, "step": 11817 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.183146284226432e-06, "loss": 0.442, "step": 11818 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.181524017203582e-06, "loss": 0.4072, "step": 11819 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.179901981649674e-06, "loss": 0.4196, "step": 11820 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.178280177629237e-06, "loss": 0.4416, "step": 11821 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.17665860520679e-06, "loss": 0.4154, "step": 11822 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.175037264446833e-06, "loss": 0.3953, "step": 11823 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.173416155413876e-06, "loss": 0.4331, "step": 11824 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.1717952781724056e-06, "loss": 0.4709, "step": 11825 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.170174632786903e-06, "loss": 0.3886, "step": 11826 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.168554219321842e-06, "loss": 0.4058, "step": 11827 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.166934037841683e-06, "loss": 0.4655, "step": 11828 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.16531408841088e-06, "loss": 0.4457, "step": 11829 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.163694371093878e-06, "loss": 0.5132, "step": 11830 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.162074885955113e-06, "loss": 0.404, "step": 11831 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.16045563305901e-06, "loss": 0.4168, "step": 11832 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.158836612469984e-06, "loss": 0.4813, "step": 11833 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.157217824252446e-06, "loss": 0.4304, "step": 11834 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.155599268470788e-06, "loss": 0.4527, "step": 11835 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.153980945189406e-06, "loss": 0.5146, "step": 11836 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.152362854472681e-06, "loss": 0.4387, "step": 11837 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.150744996384976e-06, "loss": 0.3957, "step": 11838 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.149127370990654e-06, "loss": 0.4007, "step": 11839 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.147509978354065e-06, "loss": 0.4804, "step": 11840 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.1458928185395595e-06, "loss": 0.4411, "step": 11841 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.144275891611466e-06, "loss": 0.4848, "step": 11842 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.1426591976341095e-06, "loss": 0.4206, "step": 11843 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.141042736671803e-06, "loss": 0.4087, "step": 11844 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.139426508788854e-06, "loss": 0.4003, "step": 11845 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.1378105140495584e-06, "loss": 0.4285, "step": 11846 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.136194752518202e-06, "loss": 0.3695, "step": 11847 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.134579224259064e-06, "loss": 0.4302, "step": 11848 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.1329639293364135e-06, "loss": 0.4153, "step": 11849 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.131348867814509e-06, "loss": 0.4271, "step": 11850 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.129734039757595e-06, "loss": 0.4456, "step": 11851 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.128119445229924e-06, "loss": 0.3899, "step": 11852 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.126505084295719e-06, "loss": 0.4271, "step": 11853 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.124890957019202e-06, "loss": 0.4622, "step": 11854 }, { "epoch": 2.14, "grad_norm": 0.0, "learning_rate": 4.123277063464588e-06, "loss": 0.4246, "step": 11855 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.121663403696077e-06, "loss": 0.4, "step": 11856 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.120049977777868e-06, "loss": 0.3825, "step": 11857 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.118436785774145e-06, "loss": 0.5095, "step": 11858 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.1168238277490815e-06, "loss": 0.4036, "step": 11859 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.115211103766849e-06, "loss": 0.4251, "step": 11860 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.113598613891592e-06, "loss": 0.3251, "step": 11861 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.11198635818747e-06, "loss": 0.4274, "step": 11862 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.1103743367186165e-06, "loss": 0.4689, "step": 11863 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.108762549549163e-06, "loss": 0.4158, "step": 11864 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.1071509967432265e-06, "loss": 0.3943, "step": 11865 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.105539678364918e-06, "loss": 0.4398, "step": 11866 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.103928594478341e-06, "loss": 0.4263, "step": 11867 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.102317745147583e-06, "loss": 0.5076, "step": 11868 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.100707130436728e-06, "loss": 0.475, "step": 11869 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.09909675040985e-06, "loss": 0.4327, "step": 11870 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.097486605131013e-06, "loss": 0.4469, "step": 11871 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.095876694664269e-06, "loss": 0.333, "step": 11872 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.094267019073662e-06, "loss": 0.437, "step": 11873 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.092657578423233e-06, "loss": 0.4636, "step": 11874 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.0910483727770104e-06, "loss": 0.4114, "step": 11875 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.089439402199e-06, "loss": 0.452, "step": 11876 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.087830666753217e-06, "loss": 0.4635, "step": 11877 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.086222166503655e-06, "loss": 0.4561, "step": 11878 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.084613901514309e-06, "loss": 0.3502, "step": 11879 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.083005871849156e-06, "loss": 0.451, "step": 11880 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.081398077572166e-06, "loss": 0.4024, "step": 11881 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.079790518747298e-06, "loss": 0.4592, "step": 11882 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.078183195438508e-06, "loss": 0.4015, "step": 11883 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.076576107709732e-06, "loss": 0.4469, "step": 11884 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.074969255624908e-06, "loss": 0.3774, "step": 11885 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.0733626392479565e-06, "loss": 0.4391, "step": 11886 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.071756258642791e-06, "loss": 0.4099, "step": 11887 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.070150113873318e-06, "loss": 0.489, "step": 11888 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.068544205003428e-06, "loss": 0.4371, "step": 11889 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.066938532097019e-06, "loss": 0.3827, "step": 11890 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.0653330952179544e-06, "loss": 0.4276, "step": 11891 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.063727894430106e-06, "loss": 0.4391, "step": 11892 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.062122929797332e-06, "loss": 0.4395, "step": 11893 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.060518201383476e-06, "loss": 0.403, "step": 11894 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.058913709252384e-06, "loss": 0.4475, "step": 11895 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.057309453467884e-06, "loss": 0.4567, "step": 11896 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.055705434093793e-06, "loss": 0.4345, "step": 11897 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.054101651193926e-06, "loss": 0.4306, "step": 11898 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.052498104832074e-06, "loss": 0.4619, "step": 11899 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.050894795072041e-06, "loss": 0.4392, "step": 11900 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.049291721977604e-06, "loss": 0.4389, "step": 11901 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.0476888856125366e-06, "loss": 0.3661, "step": 11902 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.046086286040601e-06, "loss": 0.3713, "step": 11903 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.044483923325553e-06, "loss": 0.4265, "step": 11904 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.042881797531138e-06, "loss": 0.4149, "step": 11905 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.041279908721089e-06, "loss": 0.4689, "step": 11906 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.039678256959133e-06, "loss": 0.4297, "step": 11907 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.0380768423089874e-06, "loss": 0.4653, "step": 11908 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.036475664834358e-06, "loss": 0.4245, "step": 11909 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.03487472459894e-06, "loss": 0.3671, "step": 11910 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 4.033274021666426e-06, "loss": 0.4212, "step": 11911 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.031673556100494e-06, "loss": 0.3828, "step": 11912 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.030073327964816e-06, "loss": 0.3838, "step": 11913 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.028473337323045e-06, "loss": 0.3508, "step": 11914 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.02687358423883e-06, "loss": 0.4706, "step": 11915 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.025274068775819e-06, "loss": 0.4073, "step": 11916 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.0236747909976425e-06, "loss": 0.4579, "step": 11917 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.02207575096792e-06, "loss": 0.4517, "step": 11918 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.020476948750265e-06, "loss": 0.4748, "step": 11919 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.01887838440828e-06, "loss": 0.389, "step": 11920 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.017280058005561e-06, "loss": 0.4457, "step": 11921 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.015681969605688e-06, "loss": 0.4443, "step": 11922 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.014084119272239e-06, "loss": 0.4615, "step": 11923 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.012486507068778e-06, "loss": 0.4049, "step": 11924 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.010889133058861e-06, "loss": 0.4023, "step": 11925 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.009291997306034e-06, "loss": 0.368, "step": 11926 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.00769509987383e-06, "loss": 0.3871, "step": 11927 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.0060984408257884e-06, "loss": 0.4322, "step": 11928 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.004502020225416e-06, "loss": 0.4139, "step": 11929 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.002905838136223e-06, "loss": 0.4708, "step": 11930 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 4.0013098946217084e-06, "loss": 0.4039, "step": 11931 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.999714189745361e-06, "loss": 0.3951, "step": 11932 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.998118723570664e-06, "loss": 0.4585, "step": 11933 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.996523496161087e-06, "loss": 0.3846, "step": 11934 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.99492850758009e-06, "loss": 0.4366, "step": 11935 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.993333757891128e-06, "loss": 0.4982, "step": 11936 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.991739247157631e-06, "loss": 0.4052, "step": 11937 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.9901449754430435e-06, "loss": 0.508, "step": 11938 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.988550942810784e-06, "loss": 0.4353, "step": 11939 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.986957149324267e-06, "loss": 0.4075, "step": 11940 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.985363595046895e-06, "loss": 0.457, "step": 11941 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.983770280042062e-06, "loss": 0.3118, "step": 11942 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.982177204373154e-06, "loss": 0.3763, "step": 11943 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.980584368103547e-06, "loss": 0.4046, "step": 11944 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.978991771296605e-06, "loss": 0.4835, "step": 11945 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.9773994140156845e-06, "loss": 0.4359, "step": 11946 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.975807296324134e-06, "loss": 0.3593, "step": 11947 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.974215418285285e-06, "loss": 0.4027, "step": 11948 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.9726237799624715e-06, "loss": 0.3585, "step": 11949 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.971032381419011e-06, "loss": 0.4174, "step": 11950 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.969441222718213e-06, "loss": 0.4596, "step": 11951 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.967850303923372e-06, "loss": 0.3977, "step": 11952 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.966259625097775e-06, "loss": 0.3868, "step": 11953 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.96466918630471e-06, "loss": 0.4566, "step": 11954 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.963078987607445e-06, "loss": 0.4051, "step": 11955 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.961489029069239e-06, "loss": 0.5566, "step": 11956 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.959899310753344e-06, "loss": 0.4593, "step": 11957 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.958309832723001e-06, "loss": 0.3692, "step": 11958 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.956720595041444e-06, "loss": 0.4711, "step": 11959 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.955131597771893e-06, "loss": 0.3729, "step": 11960 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.953542840977563e-06, "loss": 0.4539, "step": 11961 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.951954324721657e-06, "loss": 0.4119, "step": 11962 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.95036604906737e-06, "loss": 0.4176, "step": 11963 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.948778014077884e-06, "loss": 0.3802, "step": 11964 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.9471902198163715e-06, "loss": 0.4703, "step": 11965 }, { "epoch": 2.16, "grad_norm": 0.0, "learning_rate": 3.945602666346008e-06, "loss": 0.3735, "step": 11966 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.94401535372994e-06, "loss": 0.3798, "step": 11967 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.9424282820313144e-06, "loss": 0.4613, "step": 11968 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.9408414513132685e-06, "loss": 0.4063, "step": 11969 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.939254861638927e-06, "loss": 0.4582, "step": 11970 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.937668513071413e-06, "loss": 0.4127, "step": 11971 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.9360824056738316e-06, "loss": 0.4239, "step": 11972 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.934496539509278e-06, "loss": 0.3734, "step": 11973 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.932910914640849e-06, "loss": 0.3964, "step": 11974 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.931325531131609e-06, "loss": 0.3927, "step": 11975 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.929740389044638e-06, "loss": 0.3938, "step": 11976 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.928155488442995e-06, "loss": 0.4677, "step": 11977 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.926570829389727e-06, "loss": 0.4331, "step": 11978 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.924986411947878e-06, "loss": 0.3874, "step": 11979 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.923402236180474e-06, "loss": 0.3771, "step": 11980 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.921818302150541e-06, "loss": 0.4547, "step": 11981 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.920234609921087e-06, "loss": 0.4606, "step": 11982 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.9186511595551156e-06, "loss": 0.3844, "step": 11983 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.91706795111562e-06, "loss": 0.3643, "step": 11984 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.9154849846655805e-06, "loss": 0.4674, "step": 11985 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.913902260267969e-06, "loss": 0.46, "step": 11986 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.912319777985756e-06, "loss": 0.4431, "step": 11987 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.910737537881889e-06, "loss": 0.4307, "step": 11988 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.909155540019319e-06, "loss": 0.391, "step": 11989 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.907573784460972e-06, "loss": 0.5165, "step": 11990 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.905992271269774e-06, "loss": 0.3521, "step": 11991 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.904411000508645e-06, "loss": 0.3819, "step": 11992 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.9028299722404905e-06, "loss": 0.4003, "step": 11993 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.901249186528203e-06, "loss": 0.4853, "step": 11994 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.899668643434672e-06, "loss": 0.3566, "step": 11995 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.898088343022772e-06, "loss": 0.4939, "step": 11996 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.896508285355371e-06, "loss": 0.4544, "step": 11997 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.894928470495327e-06, "loss": 0.421, "step": 11998 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.893348898505485e-06, "loss": 0.4816, "step": 11999 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.8917695694486875e-06, "loss": 0.4029, "step": 12000 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.89019048338776e-06, "loss": 0.4902, "step": 12001 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.888611640385518e-06, "loss": 0.4708, "step": 12002 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.887033040504777e-06, "loss": 0.4303, "step": 12003 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.8854546838083375e-06, "loss": 0.4269, "step": 12004 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.883876570358983e-06, "loss": 0.5181, "step": 12005 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.882298700219496e-06, "loss": 0.3997, "step": 12006 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.880721073452641e-06, "loss": 0.4342, "step": 12007 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.879143690121191e-06, "loss": 0.4869, "step": 12008 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.877566550287889e-06, "loss": 0.425, "step": 12009 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.8759896540154785e-06, "loss": 0.5123, "step": 12010 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.874413001366694e-06, "loss": 0.4853, "step": 12011 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.872836592404247e-06, "loss": 0.3577, "step": 12012 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.8712604271908605e-06, "loss": 0.3902, "step": 12013 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.869684505789233e-06, "loss": 0.4278, "step": 12014 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.868108828262058e-06, "loss": 0.4578, "step": 12015 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.866533394672017e-06, "loss": 0.431, "step": 12016 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.864958205081787e-06, "loss": 0.4534, "step": 12017 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.863383259554028e-06, "loss": 0.4067, "step": 12018 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.861808558151396e-06, "loss": 0.4058, "step": 12019 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.860234100936535e-06, "loss": 0.4237, "step": 12020 }, { "epoch": 2.17, "grad_norm": 0.0, "learning_rate": 3.8586598879720794e-06, "loss": 0.4045, "step": 12021 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.857085919320653e-06, "loss": 0.3975, "step": 12022 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.855512195044873e-06, "loss": 0.388, "step": 12023 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.853938715207341e-06, "loss": 0.465, "step": 12024 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.852365479870659e-06, "loss": 0.4597, "step": 12025 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.8507924890974094e-06, "loss": 0.3774, "step": 12026 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.849219742950172e-06, "loss": 0.3829, "step": 12027 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.847647241491508e-06, "loss": 0.4051, "step": 12028 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.8460749847839716e-06, "loss": 0.4587, "step": 12029 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.844502972890118e-06, "loss": 0.4792, "step": 12030 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.842931205872481e-06, "loss": 0.3752, "step": 12031 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.841359683793588e-06, "loss": 0.4387, "step": 12032 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.8397884067159586e-06, "loss": 0.412, "step": 12033 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.838217374702098e-06, "loss": 0.3706, "step": 12034 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.836646587814506e-06, "loss": 0.4169, "step": 12035 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.835076046115672e-06, "loss": 0.4446, "step": 12036 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.833505749668074e-06, "loss": 0.4406, "step": 12037 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.831935698534181e-06, "loss": 0.4176, "step": 12038 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.830365892776452e-06, "loss": 0.4282, "step": 12039 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.828796332457333e-06, "loss": 0.4471, "step": 12040 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.827227017639272e-06, "loss": 0.4409, "step": 12041 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.825657948384698e-06, "loss": 0.4153, "step": 12042 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.824089124756024e-06, "loss": 0.3723, "step": 12043 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.822520546815666e-06, "loss": 0.476, "step": 12044 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.820952214626018e-06, "loss": 0.4302, "step": 12045 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.819384128249481e-06, "loss": 0.4538, "step": 12046 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.817816287748431e-06, "loss": 0.372, "step": 12047 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.81624869318524e-06, "loss": 0.4033, "step": 12048 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.814681344622272e-06, "loss": 0.3939, "step": 12049 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.8131142421218704e-06, "loss": 0.4254, "step": 12050 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.8115473857463848e-06, "loss": 0.4729, "step": 12051 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.809980775558146e-06, "loss": 0.3824, "step": 12052 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.8084144116194777e-06, "loss": 0.4241, "step": 12053 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.80684829399269e-06, "loss": 0.424, "step": 12054 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.8052824227400863e-06, "loss": 0.4608, "step": 12055 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.8037167979239596e-06, "loss": 0.3977, "step": 12056 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.8021514196065944e-06, "loss": 0.3622, "step": 12057 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.800586287850262e-06, "loss": 0.4913, "step": 12058 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.799021402717228e-06, "loss": 0.4605, "step": 12059 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7974567642697456e-06, "loss": 0.4554, "step": 12060 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7958923725700593e-06, "loss": 0.4764, "step": 12061 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7943282276803985e-06, "loss": 0.3158, "step": 12062 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7927643296629947e-06, "loss": 0.3669, "step": 12063 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7912006785800638e-06, "loss": 0.4744, "step": 12064 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7896372744938016e-06, "loss": 0.4192, "step": 12065 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7880741174664083e-06, "loss": 0.4587, "step": 12066 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.786511207560064e-06, "loss": 0.5074, "step": 12067 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7849485448369527e-06, "loss": 0.4124, "step": 12068 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7833861293592343e-06, "loss": 0.3489, "step": 12069 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.781823961189066e-06, "loss": 0.4453, "step": 12070 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7802620403885937e-06, "loss": 0.4324, "step": 12071 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7787003670199507e-06, "loss": 0.4573, "step": 12072 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7771389411452652e-06, "loss": 0.3791, "step": 12073 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7755777628266545e-06, "loss": 0.4275, "step": 12074 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.7740168321262217e-06, "loss": 0.42, "step": 12075 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.772456149106065e-06, "loss": 0.424, "step": 12076 }, { "epoch": 2.18, "grad_norm": 0.0, "learning_rate": 3.770895713828272e-06, "loss": 0.4002, "step": 12077 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.769335526354915e-06, "loss": 0.4495, "step": 12078 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.767775586748067e-06, "loss": 0.3876, "step": 12079 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.766215895069787e-06, "loss": 0.4321, "step": 12080 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7646564513821137e-06, "loss": 0.403, "step": 12081 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.763097255747089e-06, "loss": 0.4662, "step": 12082 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7615383082267367e-06, "loss": 0.4725, "step": 12083 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.75997960888308e-06, "loss": 0.4643, "step": 12084 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.758421157778125e-06, "loss": 0.3054, "step": 12085 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7568629549738677e-06, "loss": 0.3564, "step": 12086 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.755305000532302e-06, "loss": 0.4547, "step": 12087 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.753747294515393e-06, "loss": 0.4965, "step": 12088 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7521898369851216e-06, "loss": 0.4433, "step": 12089 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.750632628003441e-06, "loss": 0.4609, "step": 12090 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7490756676323005e-06, "loss": 0.4392, "step": 12091 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7475189559336378e-06, "loss": 0.3712, "step": 12092 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7459624929693827e-06, "loss": 0.4266, "step": 12093 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7444062788014492e-06, "loss": 0.4197, "step": 12094 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7428503134917584e-06, "loss": 0.4524, "step": 12095 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7412945971021965e-06, "loss": 0.4025, "step": 12096 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7397391296946584e-06, "loss": 0.3638, "step": 12097 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7381839113310216e-06, "loss": 0.3692, "step": 12098 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7366289420731562e-06, "loss": 0.4108, "step": 12099 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7350742219829174e-06, "loss": 0.4056, "step": 12100 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.733519751122162e-06, "loss": 0.5215, "step": 12101 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.731965529552729e-06, "loss": 0.3987, "step": 12102 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7304115573364417e-06, "loss": 0.3926, "step": 12103 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7288578345351234e-06, "loss": 0.4746, "step": 12104 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7273043612105808e-06, "loss": 0.4137, "step": 12105 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.725751137424619e-06, "loss": 0.4268, "step": 12106 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7241981632390256e-06, "loss": 0.5363, "step": 12107 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.72264543871558e-06, "loss": 0.3865, "step": 12108 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.721092963916052e-06, "loss": 0.4255, "step": 12109 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.719540738902203e-06, "loss": 0.4142, "step": 12110 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7179887637357826e-06, "loss": 0.4898, "step": 12111 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.716437038478531e-06, "loss": 0.446, "step": 12112 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.714885563192178e-06, "loss": 0.3928, "step": 12113 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7133343379384435e-06, "loss": 0.4178, "step": 12114 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.71178336277904e-06, "loss": 0.3861, "step": 12115 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7102326377756626e-06, "loss": 0.4051, "step": 12116 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7086821629900126e-06, "loss": 0.4689, "step": 12117 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7071319384837613e-06, "loss": 0.4225, "step": 12118 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.705581964318581e-06, "loss": 0.3844, "step": 12119 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7040322405561323e-06, "loss": 0.4151, "step": 12120 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.7024827672580642e-06, "loss": 0.3907, "step": 12121 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.700933544486023e-06, "loss": 0.4047, "step": 12122 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.6993845723016365e-06, "loss": 0.5386, "step": 12123 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.6978358507665257e-06, "loss": 0.3994, "step": 12124 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.696287379942304e-06, "loss": 0.4865, "step": 12125 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.6947391598905614e-06, "loss": 0.4035, "step": 12126 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.6931911906729013e-06, "loss": 0.3993, "step": 12127 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.6916434723508987e-06, "loss": 0.4134, "step": 12128 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.6900960049861267e-06, "loss": 0.4404, "step": 12129 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.6885487886401437e-06, "loss": 0.4988, "step": 12130 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.687001823374503e-06, "loss": 0.3886, "step": 12131 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 3.685455109250742e-06, "loss": 0.4192, "step": 12132 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6839086463304e-06, "loss": 0.458, "step": 12133 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6823624346749874e-06, "loss": 0.4948, "step": 12134 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6808164743460217e-06, "loss": 0.4507, "step": 12135 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.679270765405002e-06, "loss": 0.3973, "step": 12136 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6777253079134145e-06, "loss": 0.444, "step": 12137 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.676180101932749e-06, "loss": 0.4702, "step": 12138 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6746351475244723e-06, "loss": 0.4242, "step": 12139 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6730904447500495e-06, "loss": 0.4444, "step": 12140 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.671545993670923e-06, "loss": 0.4057, "step": 12141 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6700017943485354e-06, "loss": 0.3976, "step": 12142 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.668457846844323e-06, "loss": 0.4419, "step": 12143 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.666914151219705e-06, "loss": 0.4526, "step": 12144 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.66537070753609e-06, "loss": 0.4741, "step": 12145 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6638275158548806e-06, "loss": 0.4842, "step": 12146 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6622845762374683e-06, "loss": 0.4364, "step": 12147 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6607418887452317e-06, "loss": 0.3866, "step": 12148 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6591994534395424e-06, "loss": 0.5189, "step": 12149 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6576572703817602e-06, "loss": 0.4118, "step": 12150 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6561153396332383e-06, "loss": 0.3578, "step": 12151 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6545736612553163e-06, "loss": 0.4018, "step": 12152 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.653032235309323e-06, "loss": 0.4871, "step": 12153 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.651491061856577e-06, "loss": 0.4007, "step": 12154 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6499501409583993e-06, "loss": 0.4858, "step": 12155 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.64840947267608e-06, "loss": 0.404, "step": 12156 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6468690570709132e-06, "loss": 0.521, "step": 12157 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6453288942041786e-06, "loss": 0.4922, "step": 12158 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.643788984137143e-06, "loss": 0.4266, "step": 12159 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.642249326931073e-06, "loss": 0.5222, "step": 12160 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6407099226472174e-06, "loss": 0.3602, "step": 12161 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.639170771346815e-06, "loss": 0.4245, "step": 12162 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6376318730910997e-06, "loss": 0.4736, "step": 12163 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6360932279412797e-06, "loss": 0.5958, "step": 12164 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.634554835958578e-06, "loss": 0.4585, "step": 12165 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6330166972041892e-06, "loss": 0.4081, "step": 12166 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.631478811739303e-06, "loss": 0.4611, "step": 12167 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6299411796250995e-06, "loss": 0.4297, "step": 12168 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6284038009227485e-06, "loss": 0.4326, "step": 12169 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.626866675693409e-06, "loss": 0.4103, "step": 12170 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.625329803998232e-06, "loss": 0.4517, "step": 12171 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6237931858983556e-06, "loss": 0.4283, "step": 12172 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.622256821454908e-06, "loss": 0.3842, "step": 12173 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.620720710729011e-06, "loss": 0.4329, "step": 12174 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6191848537817686e-06, "loss": 0.3717, "step": 12175 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.617649250674288e-06, "loss": 0.4249, "step": 12176 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6161139014676527e-06, "loss": 0.4019, "step": 12177 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.614578806222946e-06, "loss": 0.4526, "step": 12178 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6130439650012304e-06, "loss": 0.4516, "step": 12179 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6115093778635633e-06, "loss": 0.4426, "step": 12180 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6099750448710013e-06, "loss": 0.476, "step": 12181 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6084409660845777e-06, "loss": 0.4475, "step": 12182 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.606907141565322e-06, "loss": 0.4326, "step": 12183 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6053735713742532e-06, "loss": 0.4897, "step": 12184 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6038402555723784e-06, "loss": 0.4481, "step": 12185 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.6023071942206943e-06, "loss": 0.4121, "step": 12186 }, { "epoch": 2.2, "grad_norm": 0.0, "learning_rate": 3.60077438738019e-06, "loss": 0.4304, "step": 12187 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.599241835111844e-06, "loss": 0.4665, "step": 12188 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5977095374766225e-06, "loss": 0.5273, "step": 12189 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.596177494535483e-06, "loss": 0.395, "step": 12190 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5946457063493734e-06, "loss": 0.4374, "step": 12191 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5931141729792274e-06, "loss": 0.4349, "step": 12192 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5915828944859822e-06, "loss": 0.4012, "step": 12193 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.590051870930543e-06, "loss": 0.4582, "step": 12194 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5885211023738233e-06, "loss": 0.4156, "step": 12195 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5869905888767154e-06, "loss": 0.462, "step": 12196 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5854603305001057e-06, "loss": 0.45, "step": 12197 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5839303273048765e-06, "loss": 0.4254, "step": 12198 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.582400579351889e-06, "loss": 0.5379, "step": 12199 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5808710867020012e-06, "loss": 0.4831, "step": 12200 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5793418494160616e-06, "loss": 0.4912, "step": 12201 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.577812867554896e-06, "loss": 0.4282, "step": 12202 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5762841411793392e-06, "loss": 0.5159, "step": 12203 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.574755670350204e-06, "loss": 0.3558, "step": 12204 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5732274551282943e-06, "loss": 0.4009, "step": 12205 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5716994955744078e-06, "loss": 0.3159, "step": 12206 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.570171791749326e-06, "loss": 0.4291, "step": 12207 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5686443437138265e-06, "loss": 0.4507, "step": 12208 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.567117151528672e-06, "loss": 0.4268, "step": 12209 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5655902152546172e-06, "loss": 0.3637, "step": 12210 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5640635349524068e-06, "loss": 0.4282, "step": 12211 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5625371106827733e-06, "loss": 0.3884, "step": 12212 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.561010942506439e-06, "loss": 0.405, "step": 12213 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5594850304841235e-06, "loss": 0.4745, "step": 12214 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5579593746765264e-06, "loss": 0.4272, "step": 12215 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.556433975144344e-06, "loss": 0.3944, "step": 12216 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.554908831948255e-06, "loss": 0.4085, "step": 12217 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.553383945148928e-06, "loss": 0.2998, "step": 12218 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5518593148070367e-06, "loss": 0.461, "step": 12219 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5503349409832268e-06, "loss": 0.5093, "step": 12220 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5488108237381424e-06, "loss": 0.4785, "step": 12221 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5472869631324147e-06, "loss": 0.428, "step": 12222 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.545763359226666e-06, "loss": 0.4365, "step": 12223 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.544240012081508e-06, "loss": 0.4572, "step": 12224 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.542716921757541e-06, "loss": 0.3981, "step": 12225 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5411940883153574e-06, "loss": 0.4559, "step": 12226 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5396715118155367e-06, "loss": 0.4573, "step": 12227 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5381491923186506e-06, "loss": 0.4168, "step": 12228 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.536627129885256e-06, "loss": 0.4771, "step": 12229 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5351053245759105e-06, "loss": 0.3998, "step": 12230 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5335837764511526e-06, "loss": 0.4815, "step": 12231 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.532062485571507e-06, "loss": 0.4122, "step": 12232 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5305414519974957e-06, "loss": 0.3902, "step": 12233 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.529020675789625e-06, "loss": 0.3605, "step": 12234 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5275001570084e-06, "loss": 0.4952, "step": 12235 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5259798957143078e-06, "loss": 0.3798, "step": 12236 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.524459891967825e-06, "loss": 0.3875, "step": 12237 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.522940145829421e-06, "loss": 0.4391, "step": 12238 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.521420657359554e-06, "loss": 0.4485, "step": 12239 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5199014266186727e-06, "loss": 0.4643, "step": 12240 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5183824536672128e-06, "loss": 0.4122, "step": 12241 }, { "epoch": 2.21, "grad_norm": 0.0, "learning_rate": 3.5168637385656026e-06, "loss": 0.4459, "step": 12242 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.5153452813742584e-06, "loss": 0.336, "step": 12243 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.513827082153588e-06, "loss": 0.457, "step": 12244 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.5123091409639886e-06, "loss": 0.395, "step": 12245 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.5107914578658453e-06, "loss": 0.4728, "step": 12246 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.509274032919534e-06, "loss": 0.4375, "step": 12247 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.507756866185421e-06, "loss": 0.3847, "step": 12248 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.5062399577238616e-06, "loss": 0.4157, "step": 12249 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.504723307595201e-06, "loss": 0.3786, "step": 12250 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.5032069158597714e-06, "loss": 0.4999, "step": 12251 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.5016907825779036e-06, "loss": 0.4195, "step": 12252 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.500174907809909e-06, "loss": 0.4133, "step": 12253 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.498659291616093e-06, "loss": 0.4566, "step": 12254 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.497143934056747e-06, "loss": 0.4392, "step": 12255 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4956288351921496e-06, "loss": 0.5293, "step": 12256 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4941139950825855e-06, "loss": 0.3641, "step": 12257 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.49259941378831e-06, "loss": 0.3619, "step": 12258 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.491085091369578e-06, "loss": 0.4025, "step": 12259 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4895710278866314e-06, "loss": 0.4069, "step": 12260 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.488057223399702e-06, "loss": 0.4377, "step": 12261 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4865436779690132e-06, "loss": 0.4506, "step": 12262 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4850303916547746e-06, "loss": 0.3844, "step": 12263 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.483517364517187e-06, "loss": 0.4232, "step": 12264 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.482004596616442e-06, "loss": 0.4305, "step": 12265 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4804920880127204e-06, "loss": 0.4153, "step": 12266 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4789798387661887e-06, "loss": 0.4372, "step": 12267 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.477467848937014e-06, "loss": 0.3696, "step": 12268 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.475956118585345e-06, "loss": 0.4678, "step": 12269 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4744446477713146e-06, "loss": 0.4524, "step": 12270 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4729334365550547e-06, "loss": 0.4883, "step": 12271 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4714224849966805e-06, "loss": 0.398, "step": 12272 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.469911793156309e-06, "loss": 0.3705, "step": 12273 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4684013610940315e-06, "loss": 0.4073, "step": 12274 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4668911888699363e-06, "loss": 0.4704, "step": 12275 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4653812765441066e-06, "loss": 0.3164, "step": 12276 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.463871624176597e-06, "loss": 0.3974, "step": 12277 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.462362231827474e-06, "loss": 0.4182, "step": 12278 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4608530995567814e-06, "loss": 0.4321, "step": 12279 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4593442274245546e-06, "loss": 0.4299, "step": 12280 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4578356154908186e-06, "loss": 0.4009, "step": 12281 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.45632726381559e-06, "loss": 0.4022, "step": 12282 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.454819172458873e-06, "loss": 0.4663, "step": 12283 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4533113414806607e-06, "loss": 0.4708, "step": 12284 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4518037709409393e-06, "loss": 0.4238, "step": 12285 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4502964608996812e-06, "loss": 0.4088, "step": 12286 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.448789411416851e-06, "loss": 0.4224, "step": 12287 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.447282622552399e-06, "loss": 0.403, "step": 12288 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4457760943662676e-06, "loss": 0.4509, "step": 12289 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.444269826918395e-06, "loss": 0.4346, "step": 12290 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4427638202686987e-06, "loss": 0.4947, "step": 12291 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.441258074477094e-06, "loss": 0.4829, "step": 12292 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4397525896034758e-06, "loss": 0.3557, "step": 12293 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4382473657077343e-06, "loss": 0.3945, "step": 12294 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4367424028497565e-06, "loss": 0.4755, "step": 12295 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.43523770108941e-06, "loss": 0.4333, "step": 12296 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.4337332604865537e-06, "loss": 0.4165, "step": 12297 }, { "epoch": 2.22, "grad_norm": 0.0, "learning_rate": 3.432229081101036e-06, "loss": 0.3967, "step": 12298 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4307251629926985e-06, "loss": 0.4568, "step": 12299 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.429221506221366e-06, "loss": 0.4225, "step": 12300 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4277181108468594e-06, "loss": 0.4853, "step": 12301 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4262149769289855e-06, "loss": 0.4494, "step": 12302 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.42471210452754e-06, "loss": 0.4228, "step": 12303 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.423209493702313e-06, "loss": 0.4065, "step": 12304 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4217071445130744e-06, "loss": 0.4619, "step": 12305 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4202050570195977e-06, "loss": 0.4422, "step": 12306 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4187032312816405e-06, "loss": 0.3947, "step": 12307 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.417201667358938e-06, "loss": 0.4103, "step": 12308 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.415700365311232e-06, "loss": 0.4507, "step": 12309 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4141993251982407e-06, "loss": 0.4437, "step": 12310 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4126985470796858e-06, "loss": 0.3611, "step": 12311 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.411198031015267e-06, "loss": 0.4959, "step": 12312 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4096977770646778e-06, "loss": 0.4243, "step": 12313 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4081977852876047e-06, "loss": 0.4645, "step": 12314 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.406698055743709e-06, "loss": 0.4348, "step": 12315 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.405198588492664e-06, "loss": 0.4318, "step": 12316 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.403699383594116e-06, "loss": 0.4045, "step": 12317 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.4022004411077058e-06, "loss": 0.451, "step": 12318 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.400701761093066e-06, "loss": 0.5267, "step": 12319 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3992033436098137e-06, "loss": 0.4184, "step": 12320 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3977051887175583e-06, "loss": 0.4905, "step": 12321 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3962072964759076e-06, "loss": 0.4596, "step": 12322 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.394709666944439e-06, "loss": 0.4005, "step": 12323 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3932123001827365e-06, "loss": 0.3942, "step": 12324 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3917151962503657e-06, "loss": 0.5216, "step": 12325 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3902183552068822e-06, "loss": 0.4647, "step": 12326 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3887217771118396e-06, "loss": 0.4024, "step": 12327 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3872254620247704e-06, "loss": 0.4743, "step": 12328 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.385729410005203e-06, "loss": 0.4281, "step": 12329 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.384233621112648e-06, "loss": 0.4651, "step": 12330 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3827380954066093e-06, "loss": 0.4005, "step": 12331 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3812428329465884e-06, "loss": 0.4321, "step": 12332 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3797478337920662e-06, "loss": 0.4169, "step": 12333 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3782530980025163e-06, "loss": 0.4318, "step": 12334 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3767586256374018e-06, "loss": 0.4627, "step": 12335 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3752644167561766e-06, "loss": 0.4768, "step": 12336 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3737704714182805e-06, "loss": 0.5042, "step": 12337 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3722767896831463e-06, "loss": 0.4305, "step": 12338 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3707833716101957e-06, "loss": 0.423, "step": 12339 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3692902172588394e-06, "loss": 0.43, "step": 12340 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.367797326688477e-06, "loss": 0.3705, "step": 12341 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.366304699958498e-06, "loss": 0.4255, "step": 12342 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3648123371282795e-06, "loss": 0.4247, "step": 12343 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.363320238257196e-06, "loss": 0.3939, "step": 12344 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3618284034046056e-06, "loss": 0.4979, "step": 12345 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3603368326298503e-06, "loss": 0.3898, "step": 12346 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3588455259922705e-06, "loss": 0.4414, "step": 12347 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3573544835511885e-06, "loss": 0.5102, "step": 12348 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3558637053659283e-06, "loss": 0.3121, "step": 12349 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3543731914957924e-06, "loss": 0.4052, "step": 12350 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3528829420000752e-06, "loss": 0.4312, "step": 12351 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3513929569380653e-06, "loss": 0.4109, "step": 12352 }, { "epoch": 2.23, "grad_norm": 0.0, "learning_rate": 3.3499032363690266e-06, "loss": 0.4433, "step": 12353 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.348413780352232e-06, "loss": 0.3978, "step": 12354 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3469245889469328e-06, "loss": 0.4311, "step": 12355 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3454356622123695e-06, "loss": 0.3864, "step": 12356 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.343947000207777e-06, "loss": 0.4198, "step": 12357 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3424586029923746e-06, "loss": 0.4002, "step": 12358 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.340970470625371e-06, "loss": 0.4408, "step": 12359 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3394826031659753e-06, "loss": 0.4156, "step": 12360 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3379950006733697e-06, "loss": 0.4123, "step": 12361 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.336507663206735e-06, "loss": 0.4308, "step": 12362 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3350205908252407e-06, "loss": 0.4001, "step": 12363 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3335337835880422e-06, "loss": 0.4519, "step": 12364 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3320472415542926e-06, "loss": 0.475, "step": 12365 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.330560964783127e-06, "loss": 0.5407, "step": 12366 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3290749533336743e-06, "loss": 0.3711, "step": 12367 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.327589207265045e-06, "loss": 0.4147, "step": 12368 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.326103726636345e-06, "loss": 0.4064, "step": 12369 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.324618511506674e-06, "loss": 0.391, "step": 12370 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3231335619351147e-06, "loss": 0.349, "step": 12371 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.32164887798074e-06, "loss": 0.4502, "step": 12372 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3201644597026138e-06, "loss": 0.4351, "step": 12373 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3186803071597884e-06, "loss": 0.4155, "step": 12374 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.317196420411306e-06, "loss": 0.4009, "step": 12375 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3157127995162e-06, "loss": 0.3815, "step": 12376 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3142294445334877e-06, "loss": 0.3968, "step": 12377 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3127463555221816e-06, "loss": 0.4304, "step": 12378 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3112635325412822e-06, "loss": 0.4349, "step": 12379 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.309780975649778e-06, "loss": 0.4279, "step": 12380 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3082986849066434e-06, "loss": 0.4565, "step": 12381 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3068166603708574e-06, "loss": 0.3747, "step": 12382 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.305334902101368e-06, "loss": 0.4814, "step": 12383 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.303853410157124e-06, "loss": 0.4173, "step": 12384 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.3023721845970623e-06, "loss": 0.4031, "step": 12385 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.300891225480106e-06, "loss": 0.3596, "step": 12386 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.299410532865177e-06, "loss": 0.4732, "step": 12387 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2979301068111746e-06, "loss": 0.3905, "step": 12388 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2964499473769952e-06, "loss": 0.356, "step": 12389 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2949700546215225e-06, "loss": 0.3682, "step": 12390 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.293490428603622e-06, "loss": 0.4684, "step": 12391 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2920110693821637e-06, "loss": 0.5267, "step": 12392 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.290531977015996e-06, "loss": 0.3829, "step": 12393 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2890531515639613e-06, "loss": 0.4434, "step": 12394 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.287574593084888e-06, "loss": 0.4327, "step": 12395 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.286096301637597e-06, "loss": 0.4607, "step": 12396 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.284618277280893e-06, "loss": 0.4603, "step": 12397 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2831405200735843e-06, "loss": 0.4427, "step": 12398 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.28166303007445e-06, "loss": 0.4808, "step": 12399 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.280185807342269e-06, "loss": 0.369, "step": 12400 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.278708851935808e-06, "loss": 0.4128, "step": 12401 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2772321639138194e-06, "loss": 0.4933, "step": 12402 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2757557433350563e-06, "loss": 0.4309, "step": 12403 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.274279590258248e-06, "loss": 0.4109, "step": 12404 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2728037047421234e-06, "loss": 0.4804, "step": 12405 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.271328086845389e-06, "loss": 0.4599, "step": 12406 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2698527366267462e-06, "loss": 0.4277, "step": 12407 }, { "epoch": 2.24, "grad_norm": 0.0, "learning_rate": 3.2683776541448943e-06, "loss": 0.3728, "step": 12408 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.266902839458511e-06, "loss": 0.4085, "step": 12409 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.265428292626267e-06, "loss": 0.4272, "step": 12410 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2639540137068227e-06, "loss": 0.3569, "step": 12411 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.262480002758828e-06, "loss": 0.3976, "step": 12412 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2610062598409196e-06, "loss": 0.3783, "step": 12413 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2595327850117277e-06, "loss": 0.4922, "step": 12414 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2580595783298676e-06, "loss": 0.3397, "step": 12415 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.256586639853948e-06, "loss": 0.4414, "step": 12416 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2551139696425636e-06, "loss": 0.4728, "step": 12417 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2536415677542966e-06, "loss": 0.4331, "step": 12418 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2521694342477294e-06, "loss": 0.4966, "step": 12419 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2506975691814248e-06, "loss": 0.4381, "step": 12420 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2492259726139286e-06, "loss": 0.3982, "step": 12421 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2477546446037887e-06, "loss": 0.4018, "step": 12422 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2462835852095333e-06, "loss": 0.4917, "step": 12423 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.24481279448969e-06, "loss": 0.432, "step": 12424 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2433422725027653e-06, "loss": 0.3728, "step": 12425 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2418720193072604e-06, "loss": 0.4508, "step": 12426 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.240402034961664e-06, "loss": 0.4799, "step": 12427 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2389323195244536e-06, "loss": 0.4438, "step": 12428 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.237462873054099e-06, "loss": 0.4006, "step": 12429 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2359936956090554e-06, "loss": 0.4224, "step": 12430 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.23452478724777e-06, "loss": 0.521, "step": 12431 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.233056148028678e-06, "loss": 0.3922, "step": 12432 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2315877780102068e-06, "loss": 0.3702, "step": 12433 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.230119677250767e-06, "loss": 0.4464, "step": 12434 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2286518458087613e-06, "loss": 0.4092, "step": 12435 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2271842837425917e-06, "loss": 0.4301, "step": 12436 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.22571699111063e-06, "loss": 0.3944, "step": 12437 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2242499679712513e-06, "loss": 0.4312, "step": 12438 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2227832143828163e-06, "loss": 0.4439, "step": 12439 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.221316730403673e-06, "loss": 0.4049, "step": 12440 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2198505160921643e-06, "loss": 0.4312, "step": 12441 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2183845715066185e-06, "loss": 0.5206, "step": 12442 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2169188967053544e-06, "loss": 0.4031, "step": 12443 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.215453491746674e-06, "loss": 0.3923, "step": 12444 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2139883566888717e-06, "loss": 0.4595, "step": 12445 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2125234915902426e-06, "loss": 0.4277, "step": 12446 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2110588965090562e-06, "loss": 0.5245, "step": 12447 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.209594571503576e-06, "loss": 0.3599, "step": 12448 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2081305166320577e-06, "loss": 0.3613, "step": 12449 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.206666731952742e-06, "loss": 0.3393, "step": 12450 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.205203217523861e-06, "loss": 0.3598, "step": 12451 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2037399734036366e-06, "loss": 0.4656, "step": 12452 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2022769996502777e-06, "loss": 0.373, "step": 12453 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.2008142963219857e-06, "loss": 0.4085, "step": 12454 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.1993518634769492e-06, "loss": 0.4452, "step": 12455 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.1978897011733425e-06, "loss": 0.4221, "step": 12456 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.196427809469338e-06, "loss": 0.3981, "step": 12457 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.1949661884230943e-06, "loss": 0.471, "step": 12458 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.19350483809275e-06, "loss": 0.4445, "step": 12459 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.192043758536444e-06, "loss": 0.5365, "step": 12460 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.1905829498122964e-06, "loss": 0.4116, "step": 12461 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.1891224119784283e-06, "loss": 0.4303, "step": 12462 }, { "epoch": 2.25, "grad_norm": 0.0, "learning_rate": 3.1876621450929367e-06, "loss": 0.4117, "step": 12463 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.186202149213916e-06, "loss": 0.3814, "step": 12464 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1847424243994463e-06, "loss": 0.4303, "step": 12465 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1832829707075985e-06, "loss": 0.3975, "step": 12466 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.181823788196431e-06, "loss": 0.3856, "step": 12467 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1803648769239937e-06, "loss": 0.4398, "step": 12468 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.178906236948325e-06, "loss": 0.4598, "step": 12469 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1774478683274514e-06, "loss": 0.4607, "step": 12470 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1759897711193876e-06, "loss": 0.482, "step": 12471 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1745319453821423e-06, "loss": 0.3718, "step": 12472 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1730743911737092e-06, "loss": 0.4317, "step": 12473 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1716171085520707e-06, "loss": 0.3935, "step": 12474 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1701600975752024e-06, "loss": 0.4914, "step": 12475 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.168703358301065e-06, "loss": 0.4285, "step": 12476 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.167246890787611e-06, "loss": 0.386, "step": 12477 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1657906950927773e-06, "loss": 0.4544, "step": 12478 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.164334771274501e-06, "loss": 0.3218, "step": 12479 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1628791193906973e-06, "loss": 0.4178, "step": 12480 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1614237394992787e-06, "loss": 0.4468, "step": 12481 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1599686316581345e-06, "loss": 0.4668, "step": 12482 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.158513795925153e-06, "loss": 0.462, "step": 12483 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.157059232358217e-06, "loss": 0.3472, "step": 12484 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.155604941015187e-06, "loss": 0.4491, "step": 12485 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1541509219539166e-06, "loss": 0.4746, "step": 12486 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.152697175232251e-06, "loss": 0.4468, "step": 12487 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.151243700908021e-06, "loss": 0.4493, "step": 12488 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1497904990390506e-06, "loss": 0.4165, "step": 12489 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1483375696831475e-06, "loss": 0.4661, "step": 12490 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.146884912898114e-06, "loss": 0.4151, "step": 12491 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1454325287417385e-06, "loss": 0.3588, "step": 12492 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1439804172718e-06, "loss": 0.3966, "step": 12493 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1425285785460623e-06, "loss": 0.4138, "step": 12494 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1410770126222888e-06, "loss": 0.4372, "step": 12495 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1396257195582246e-06, "loss": 0.4297, "step": 12496 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.138174699411598e-06, "loss": 0.4137, "step": 12497 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1367239522401373e-06, "loss": 0.4822, "step": 12498 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1352734781015525e-06, "loss": 0.4086, "step": 12499 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1338232770535516e-06, "loss": 0.3826, "step": 12500 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.132373349153822e-06, "loss": 0.4405, "step": 12501 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1309236944600475e-06, "loss": 0.4474, "step": 12502 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1294743130298945e-06, "loss": 0.3763, "step": 12503 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1280252049210224e-06, "loss": 0.4347, "step": 12504 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.126576370191081e-06, "loss": 0.3414, "step": 12505 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1251278088977067e-06, "loss": 0.4623, "step": 12506 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1236795210985247e-06, "loss": 0.3743, "step": 12507 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.122231506851152e-06, "loss": 0.4272, "step": 12508 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1207837662131923e-06, "loss": 0.3471, "step": 12509 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.119336299242235e-06, "loss": 0.365, "step": 12510 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1178891059958736e-06, "loss": 0.4317, "step": 12511 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1164421865316706e-06, "loss": 0.3922, "step": 12512 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.11499554090719e-06, "loss": 0.4192, "step": 12513 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.11354916917998e-06, "loss": 0.4166, "step": 12514 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1121030714075796e-06, "loss": 0.4439, "step": 12515 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1106572476475204e-06, "loss": 0.4054, "step": 12516 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1092116979573185e-06, "loss": 0.4696, "step": 12517 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.1077664223944803e-06, "loss": 0.3652, "step": 12518 }, { "epoch": 2.26, "grad_norm": 0.0, "learning_rate": 3.106321421016504e-06, "loss": 0.4621, "step": 12519 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.104876693880867e-06, "loss": 0.4592, "step": 12520 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.1034322410450445e-06, "loss": 0.3804, "step": 12521 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.1019880625665056e-06, "loss": 0.4701, "step": 12522 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.100544158502697e-06, "loss": 0.4289, "step": 12523 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.099100528911063e-06, "loss": 0.4415, "step": 12524 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0976571738490315e-06, "loss": 0.449, "step": 12525 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0962140933740212e-06, "loss": 0.3619, "step": 12526 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0947712875434412e-06, "loss": 0.4875, "step": 12527 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0933287564146885e-06, "loss": 0.3606, "step": 12528 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0918865000451504e-06, "loss": 0.3654, "step": 12529 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0904445184922015e-06, "loss": 0.4388, "step": 12530 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0890028118132064e-06, "loss": 0.4245, "step": 12531 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0875613800655146e-06, "loss": 0.3503, "step": 12532 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.086120223306477e-06, "loss": 0.4962, "step": 12533 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.084679341593424e-06, "loss": 0.4583, "step": 12534 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.08323873498367e-06, "loss": 0.3751, "step": 12535 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0817984035345283e-06, "loss": 0.4357, "step": 12536 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0803583473032937e-06, "loss": 0.3754, "step": 12537 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.078918566347262e-06, "loss": 0.4023, "step": 12538 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.077479060723707e-06, "loss": 0.3944, "step": 12539 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.076039830489893e-06, "loss": 0.3644, "step": 12540 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0746008757030755e-06, "loss": 0.5252, "step": 12541 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0731621964204995e-06, "loss": 0.3843, "step": 12542 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.071723792699398e-06, "loss": 0.3893, "step": 12543 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0702856645969924e-06, "loss": 0.4361, "step": 12544 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0688478121704946e-06, "loss": 0.4491, "step": 12545 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.067410235477104e-06, "loss": 0.402, "step": 12546 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0659729345740107e-06, "loss": 0.3958, "step": 12547 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0645359095183903e-06, "loss": 0.3905, "step": 12548 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0630991603674178e-06, "loss": 0.3804, "step": 12549 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0616626871782417e-06, "loss": 0.4349, "step": 12550 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0602264900080092e-06, "loss": 0.3985, "step": 12551 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0587905689138566e-06, "loss": 0.3655, "step": 12552 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.057354923952902e-06, "loss": 0.4144, "step": 12553 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.055919555182265e-06, "loss": 0.5216, "step": 12554 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.054484462659044e-06, "loss": 0.4193, "step": 12555 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.05304964644033e-06, "loss": 0.4216, "step": 12556 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0516151065832056e-06, "loss": 0.4061, "step": 12557 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0501808431447275e-06, "loss": 0.3853, "step": 12558 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0487468561819654e-06, "loss": 0.411, "step": 12559 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0473131457519613e-06, "loss": 0.4147, "step": 12560 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.045879711911751e-06, "loss": 0.4062, "step": 12561 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0444465547183586e-06, "loss": 0.4165, "step": 12562 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0430136742287973e-06, "loss": 0.4834, "step": 12563 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0415810705000713e-06, "loss": 0.3944, "step": 12564 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0401487435891697e-06, "loss": 0.3892, "step": 12565 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0387166935530745e-06, "loss": 0.394, "step": 12566 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0372849204487543e-06, "loss": 0.3754, "step": 12567 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.035853424333168e-06, "loss": 0.4056, "step": 12568 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.034422205263262e-06, "loss": 0.4389, "step": 12569 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0329912632959713e-06, "loss": 0.4524, "step": 12570 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.031560598488226e-06, "loss": 0.3615, "step": 12571 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.030130210896941e-06, "loss": 0.4968, "step": 12572 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.028700100579013e-06, "loss": 0.4441, "step": 12573 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 3.0272702675913366e-06, "loss": 0.4576, "step": 12574 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0258407119907917e-06, "loss": 0.3867, "step": 12575 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0244114338342534e-06, "loss": 0.4009, "step": 12576 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0229824331785785e-06, "loss": 0.436, "step": 12577 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0215537100806147e-06, "loss": 0.4581, "step": 12578 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0201252645972025e-06, "loss": 0.4934, "step": 12579 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.018697096785157e-06, "loss": 0.405, "step": 12580 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0172692067013054e-06, "loss": 0.3953, "step": 12581 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0158415944024456e-06, "loss": 0.4067, "step": 12582 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.014414259945372e-06, "loss": 0.3851, "step": 12583 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.012987203386866e-06, "loss": 0.4249, "step": 12584 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0115604247836983e-06, "loss": 0.5166, "step": 12585 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0101339241926263e-06, "loss": 0.3711, "step": 12586 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.008707701670407e-06, "loss": 0.4174, "step": 12587 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.007281757273769e-06, "loss": 0.4809, "step": 12588 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0058560910594413e-06, "loss": 0.3961, "step": 12589 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0044307030841414e-06, "loss": 0.3608, "step": 12590 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0030055934045677e-06, "loss": 0.4202, "step": 12591 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.001580762077422e-06, "loss": 0.3875, "step": 12592 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 3.0001562091593815e-06, "loss": 0.4654, "step": 12593 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.99873193470712e-06, "loss": 0.5499, "step": 12594 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9973079387772974e-06, "loss": 0.4167, "step": 12595 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.995884221426556e-06, "loss": 0.4404, "step": 12596 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9944607827115413e-06, "loss": 0.4207, "step": 12597 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.993037622688879e-06, "loss": 0.4237, "step": 12598 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9916147414151832e-06, "loss": 0.4351, "step": 12599 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.990192138947059e-06, "loss": 0.4472, "step": 12600 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9887698153410993e-06, "loss": 0.3915, "step": 12601 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.987347770653888e-06, "loss": 0.4556, "step": 12602 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.985926004941996e-06, "loss": 0.3861, "step": 12603 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9845045182619827e-06, "loss": 0.4975, "step": 12604 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.983083310670397e-06, "loss": 0.3983, "step": 12605 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.981662382223779e-06, "loss": 0.3709, "step": 12606 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9802417329786547e-06, "loss": 0.4929, "step": 12607 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9788213629915363e-06, "loss": 0.4681, "step": 12608 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.977401272318935e-06, "loss": 0.45, "step": 12609 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9759814610173442e-06, "loss": 0.4005, "step": 12610 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9745619291432415e-06, "loss": 0.4678, "step": 12611 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9731426767531e-06, "loss": 0.4747, "step": 12612 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9717237039033775e-06, "loss": 0.4912, "step": 12613 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9703050106505293e-06, "loss": 0.423, "step": 12614 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9688865970509906e-06, "loss": 0.5117, "step": 12615 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.967468463161187e-06, "loss": 0.4677, "step": 12616 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.96605060903754e-06, "loss": 0.5154, "step": 12617 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9646330347364415e-06, "loss": 0.4877, "step": 12618 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.963215740314298e-06, "loss": 0.4332, "step": 12619 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.961798725827486e-06, "loss": 0.3914, "step": 12620 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9603819913323783e-06, "loss": 0.4289, "step": 12621 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9589655368853344e-06, "loss": 0.4204, "step": 12622 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.957549362542703e-06, "loss": 0.4956, "step": 12623 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.95613346836082e-06, "loss": 0.5133, "step": 12624 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9547178543960197e-06, "loss": 0.4833, "step": 12625 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9533025207046094e-06, "loss": 0.4287, "step": 12626 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9518874673428976e-06, "loss": 0.4277, "step": 12627 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.950472694367176e-06, "loss": 0.4076, "step": 12628 }, { "epoch": 2.28, "grad_norm": 0.0, "learning_rate": 2.9490582018337233e-06, "loss": 0.4848, "step": 12629 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9476439897988175e-06, "loss": 0.411, "step": 12630 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9462300583187155e-06, "loss": 0.4323, "step": 12631 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9448164074496687e-06, "loss": 0.4396, "step": 12632 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9434030372479083e-06, "loss": 0.4503, "step": 12633 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.94198994776966e-06, "loss": 0.426, "step": 12634 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.940577139071147e-06, "loss": 0.4539, "step": 12635 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.939164611208568e-06, "loss": 0.4896, "step": 12636 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9377523642381167e-06, "loss": 0.3524, "step": 12637 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9363403982159743e-06, "loss": 0.4502, "step": 12638 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.934928713198312e-06, "loss": 0.4234, "step": 12639 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9335173092412896e-06, "loss": 0.4288, "step": 12640 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9321061864010524e-06, "loss": 0.4468, "step": 12641 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9306953447337404e-06, "loss": 0.3505, "step": 12642 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9292847842954776e-06, "loss": 0.4472, "step": 12643 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9278745051423783e-06, "loss": 0.4833, "step": 12644 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.926464507330543e-06, "loss": 0.405, "step": 12645 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.925054790916072e-06, "loss": 0.4015, "step": 12646 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9236453559550404e-06, "loss": 0.3814, "step": 12647 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.922236202503522e-06, "loss": 0.462, "step": 12648 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9208273306175695e-06, "loss": 0.4586, "step": 12649 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9194187403532303e-06, "loss": 0.4625, "step": 12650 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.918010431766546e-06, "loss": 0.4349, "step": 12651 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.916602404913539e-06, "loss": 0.4324, "step": 12652 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9151946598502223e-06, "loss": 0.4129, "step": 12653 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9137871966326003e-06, "loss": 0.4024, "step": 12654 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9123800153166615e-06, "loss": 0.5067, "step": 12655 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9109731159583887e-06, "loss": 0.4593, "step": 12656 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9095664986137485e-06, "loss": 0.4049, "step": 12657 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9081601633387e-06, "loss": 0.4068, "step": 12658 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9067541101891895e-06, "loss": 0.3837, "step": 12659 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.905348339221151e-06, "loss": 0.4704, "step": 12660 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.903942850490511e-06, "loss": 0.4311, "step": 12661 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9025376440531762e-06, "loss": 0.4005, "step": 12662 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.9011327199650584e-06, "loss": 0.4694, "step": 12663 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8997280782820403e-06, "loss": 0.4608, "step": 12664 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.898323719060002e-06, "loss": 0.3761, "step": 12665 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.896919642354812e-06, "loss": 0.3739, "step": 12666 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8955158482223232e-06, "loss": 0.4501, "step": 12667 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8941123367183888e-06, "loss": 0.3733, "step": 12668 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.892709107898838e-06, "loss": 0.3617, "step": 12669 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.891306161819497e-06, "loss": 0.3701, "step": 12670 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.889903498536173e-06, "loss": 0.4412, "step": 12671 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.888501118104664e-06, "loss": 0.4602, "step": 12672 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.887099020580767e-06, "loss": 0.461, "step": 12673 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.885697206020256e-06, "loss": 0.4038, "step": 12674 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.884295674478896e-06, "loss": 0.3723, "step": 12675 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8828944260124457e-06, "loss": 0.4683, "step": 12676 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8814934606766466e-06, "loss": 0.4336, "step": 12677 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8800927785272327e-06, "loss": 0.3932, "step": 12678 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8786923796199253e-06, "loss": 0.4813, "step": 12679 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.877292264010434e-06, "loss": 0.4058, "step": 12680 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.875892431754459e-06, "loss": 0.4051, "step": 12681 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.874492882907687e-06, "loss": 0.4674, "step": 12682 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8730936175257916e-06, "loss": 0.353, "step": 12683 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8716946356644437e-06, "loss": 0.3111, "step": 12684 }, { "epoch": 2.29, "grad_norm": 0.0, "learning_rate": 2.8702959373792995e-06, "loss": 0.3974, "step": 12685 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.868897522725992e-06, "loss": 0.429, "step": 12686 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.867499391760158e-06, "loss": 0.5291, "step": 12687 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.866101544537413e-06, "loss": 0.4607, "step": 12688 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8647039811133735e-06, "loss": 0.3999, "step": 12689 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.863306701543633e-06, "loss": 0.3553, "step": 12690 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8619097058837777e-06, "loss": 0.4236, "step": 12691 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8605129941893816e-06, "loss": 0.4238, "step": 12692 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8591165665160104e-06, "loss": 0.3603, "step": 12693 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8577204229192145e-06, "loss": 0.4015, "step": 12694 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.856324563454537e-06, "loss": 0.4262, "step": 12695 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8549289881775043e-06, "loss": 0.437, "step": 12696 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8535336971436378e-06, "loss": 0.391, "step": 12697 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8521386904084424e-06, "loss": 0.4947, "step": 12698 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.850743968027415e-06, "loss": 0.4142, "step": 12699 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8493495300560383e-06, "loss": 0.3762, "step": 12700 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8479553765497913e-06, "loss": 0.3788, "step": 12701 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8465615075641297e-06, "loss": 0.3926, "step": 12702 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.845167923154506e-06, "loss": 0.4186, "step": 12703 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8437746233763585e-06, "loss": 0.397, "step": 12704 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.842381608285113e-06, "loss": 0.3018, "step": 12705 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8409888779361914e-06, "loss": 0.3929, "step": 12706 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.839596432384997e-06, "loss": 0.4265, "step": 12707 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8382042716869252e-06, "loss": 0.4031, "step": 12708 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8368123958973538e-06, "loss": 0.4806, "step": 12709 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8354208050716535e-06, "loss": 0.3804, "step": 12710 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8340294992651906e-06, "loss": 0.4341, "step": 12711 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8326384785333095e-06, "loss": 0.4452, "step": 12712 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.831247742931349e-06, "loss": 0.4437, "step": 12713 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.829857292514633e-06, "loss": 0.4252, "step": 12714 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8284671273384777e-06, "loss": 0.4772, "step": 12715 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8270772474581853e-06, "loss": 0.4334, "step": 12716 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8256876529290478e-06, "loss": 0.4158, "step": 12717 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8242983438063455e-06, "loss": 0.4816, "step": 12718 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8229093201453484e-06, "loss": 0.4735, "step": 12719 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8215205820013137e-06, "loss": 0.3961, "step": 12720 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8201321294294837e-06, "loss": 0.4414, "step": 12721 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.818743962485101e-06, "loss": 0.2729, "step": 12722 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.817356081223389e-06, "loss": 0.4164, "step": 12723 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.815968485699553e-06, "loss": 0.4943, "step": 12724 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.814581175968798e-06, "loss": 0.3965, "step": 12725 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8131941520863092e-06, "loss": 0.4382, "step": 12726 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.811807414107273e-06, "loss": 0.5069, "step": 12727 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8104209620868516e-06, "loss": 0.384, "step": 12728 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8090347960802e-06, "loss": 0.4105, "step": 12729 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8076489161424638e-06, "loss": 0.4215, "step": 12730 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.806263322328775e-06, "loss": 0.3746, "step": 12731 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8048780146942546e-06, "loss": 0.4675, "step": 12732 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8034929932940126e-06, "loss": 0.4698, "step": 12733 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.802108258183147e-06, "loss": 0.4709, "step": 12734 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.8007238094167466e-06, "loss": 0.4817, "step": 12735 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.7993396470498847e-06, "loss": 0.3936, "step": 12736 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.797955771137625e-06, "loss": 0.4596, "step": 12737 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.7965721817350288e-06, "loss": 0.5021, "step": 12738 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.795188878897128e-06, "loss": 0.4837, "step": 12739 }, { "epoch": 2.3, "grad_norm": 0.0, "learning_rate": 2.7938058626789568e-06, "loss": 0.4002, "step": 12740 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.792423133135532e-06, "loss": 0.4027, "step": 12741 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7910406903218603e-06, "loss": 0.5144, "step": 12742 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7896585342929416e-06, "loss": 0.3805, "step": 12743 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7882766651037594e-06, "loss": 0.434, "step": 12744 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7868950828092855e-06, "loss": 0.4135, "step": 12745 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7855137874644845e-06, "loss": 0.3731, "step": 12746 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.784132779124299e-06, "loss": 0.3684, "step": 12747 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.782752057843675e-06, "loss": 0.3702, "step": 12748 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.781371623677539e-06, "loss": 0.4446, "step": 12749 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7799914766808045e-06, "loss": 0.4304, "step": 12750 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7786116169083786e-06, "loss": 0.5186, "step": 12751 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7772320444151533e-06, "loss": 0.414, "step": 12752 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7758527592560113e-06, "loss": 0.4782, "step": 12753 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.774473761485822e-06, "loss": 0.3272, "step": 12754 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7730950511594435e-06, "loss": 0.4132, "step": 12755 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7717166283317255e-06, "loss": 0.3665, "step": 12756 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.770338493057502e-06, "loss": 0.4851, "step": 12757 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7689606453915985e-06, "loss": 0.3748, "step": 12758 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.767583085388824e-06, "loss": 0.4467, "step": 12759 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.766205813103989e-06, "loss": 0.3493, "step": 12760 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7648288285918814e-06, "loss": 0.4395, "step": 12761 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.763452131907274e-06, "loss": 0.3619, "step": 12762 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.762075723104938e-06, "loss": 0.4995, "step": 12763 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.760699602239625e-06, "loss": 0.3768, "step": 12764 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7593237693660878e-06, "loss": 0.3851, "step": 12765 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7579482245390542e-06, "loss": 0.4279, "step": 12766 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.756572967813247e-06, "loss": 0.4198, "step": 12767 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7551979992433754e-06, "loss": 0.43, "step": 12768 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.753823318884139e-06, "loss": 0.4007, "step": 12769 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7524489267902245e-06, "loss": 0.3696, "step": 12770 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7510748230163063e-06, "loss": 0.4226, "step": 12771 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7497010076170504e-06, "loss": 0.4587, "step": 12772 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7483274806471096e-06, "loss": 0.4176, "step": 12773 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.746954242161124e-06, "loss": 0.3932, "step": 12774 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7455812922137203e-06, "loss": 0.4215, "step": 12775 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7442086308595264e-06, "loss": 0.434, "step": 12776 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.74283625815314e-06, "loss": 0.4598, "step": 12777 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7414641741491597e-06, "loss": 0.394, "step": 12778 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.740092378902169e-06, "loss": 0.388, "step": 12779 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.738720872466737e-06, "loss": 0.4361, "step": 12780 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.737349654897432e-06, "loss": 0.4395, "step": 12781 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.735978726248798e-06, "loss": 0.4023, "step": 12782 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7346080865753754e-06, "loss": 0.3771, "step": 12783 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.733237735931692e-06, "loss": 0.4842, "step": 12784 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.731867674372254e-06, "loss": 0.5305, "step": 12785 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7304979019515744e-06, "loss": 0.4545, "step": 12786 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7291284187241405e-06, "loss": 0.3885, "step": 12787 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.727759224744435e-06, "loss": 0.3637, "step": 12788 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7263903200669252e-06, "loss": 0.4194, "step": 12789 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.725021704746068e-06, "loss": 0.4044, "step": 12790 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7236533788363116e-06, "loss": 0.5209, "step": 12791 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.722285342392088e-06, "loss": 0.3928, "step": 12792 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7209175954678214e-06, "loss": 0.3971, "step": 12793 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.7195501381179213e-06, "loss": 0.3438, "step": 12794 }, { "epoch": 2.31, "grad_norm": 0.0, "learning_rate": 2.71818297039679e-06, "loss": 0.4797, "step": 12795 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.7168160923588136e-06, "loss": 0.3902, "step": 12796 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.7154495040583685e-06, "loss": 0.4053, "step": 12797 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.7140832055498234e-06, "loss": 0.3603, "step": 12798 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.712717196887532e-06, "loss": 0.439, "step": 12799 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.711351478125832e-06, "loss": 0.4854, "step": 12800 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.7099860493190566e-06, "loss": 0.4069, "step": 12801 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.708620910521522e-06, "loss": 0.4812, "step": 12802 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.707256061787541e-06, "loss": 0.4373, "step": 12803 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.705891503171406e-06, "loss": 0.4735, "step": 12804 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.7045272347274044e-06, "loss": 0.3712, "step": 12805 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.7031632565098053e-06, "loss": 0.3445, "step": 12806 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.7017995685728736e-06, "loss": 0.4701, "step": 12807 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.700436170970857e-06, "loss": 0.3751, "step": 12808 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6990730637579945e-06, "loss": 0.3992, "step": 12809 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6977102469885117e-06, "loss": 0.3772, "step": 12810 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6963477207166257e-06, "loss": 0.3291, "step": 12811 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6949854849965397e-06, "loss": 0.4696, "step": 12812 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6936235398824418e-06, "loss": 0.3828, "step": 12813 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.692261885428521e-06, "loss": 0.4123, "step": 12814 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6909005216889394e-06, "loss": 0.4133, "step": 12815 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.689539448717856e-06, "loss": 0.4046, "step": 12816 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6881786665694165e-06, "loss": 0.4131, "step": 12817 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.686818175297753e-06, "loss": 0.4457, "step": 12818 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6854579749569933e-06, "loss": 0.4268, "step": 12819 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.684098065601246e-06, "loss": 0.3723, "step": 12820 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6827384472846107e-06, "loss": 0.4871, "step": 12821 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.681379120061178e-06, "loss": 0.4324, "step": 12822 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6800200839850153e-06, "loss": 0.4559, "step": 12823 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6786613391101968e-06, "loss": 0.5147, "step": 12824 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6773028854907725e-06, "loss": 0.4158, "step": 12825 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6759447231807833e-06, "loss": 0.4599, "step": 12826 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.67458685223426e-06, "loss": 0.4323, "step": 12827 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.673229272705221e-06, "loss": 0.4807, "step": 12828 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6718719846476725e-06, "loss": 0.4414, "step": 12829 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.67051498811561e-06, "loss": 0.3882, "step": 12830 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.669158283163018e-06, "loss": 0.4151, "step": 12831 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.667801869843868e-06, "loss": 0.3767, "step": 12832 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6664457482121207e-06, "loss": 0.3661, "step": 12833 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6650899183217215e-06, "loss": 0.3983, "step": 12834 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6637343802266124e-06, "loss": 0.3386, "step": 12835 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6623791339807194e-06, "loss": 0.4185, "step": 12836 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6610241796379553e-06, "loss": 0.458, "step": 12837 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.65966951725222e-06, "loss": 0.4258, "step": 12838 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.658315146877404e-06, "loss": 0.3762, "step": 12839 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6569610685673896e-06, "loss": 0.3934, "step": 12840 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6556072823760436e-06, "loss": 0.4035, "step": 12841 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.654253788357223e-06, "loss": 0.5013, "step": 12842 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6529005865647684e-06, "loss": 0.4005, "step": 12843 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.651547677052516e-06, "loss": 0.4439, "step": 12844 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6501950598742854e-06, "loss": 0.4908, "step": 12845 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6488427350838864e-06, "loss": 0.3843, "step": 12846 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.647490702735117e-06, "loss": 0.4172, "step": 12847 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6461389628817626e-06, "loss": 0.5347, "step": 12848 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6447875155775983e-06, "loss": 0.4843, "step": 12849 }, { "epoch": 2.32, "grad_norm": 0.0, "learning_rate": 2.6434363608763858e-06, "loss": 0.401, "step": 12850 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6420854988318755e-06, "loss": 0.4856, "step": 12851 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6407349294978148e-06, "loss": 0.4128, "step": 12852 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.639384652927922e-06, "loss": 0.3969, "step": 12853 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.638034669175917e-06, "loss": 0.4071, "step": 12854 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.636684978295506e-06, "loss": 0.4408, "step": 12855 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6353355803403767e-06, "loss": 0.4023, "step": 12856 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6339864753642177e-06, "loss": 0.4425, "step": 12857 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6326376634206954e-06, "loss": 0.3957, "step": 12858 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6312891445634668e-06, "loss": 0.3404, "step": 12859 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6299409188461834e-06, "loss": 0.4559, "step": 12860 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.628592986322469e-06, "loss": 0.3481, "step": 12861 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.627245347045958e-06, "loss": 0.3946, "step": 12862 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6258980010702563e-06, "loss": 0.3791, "step": 12863 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.624550948448964e-06, "loss": 0.3879, "step": 12864 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.623204189235671e-06, "loss": 0.4636, "step": 12865 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6218577234839526e-06, "loss": 0.409, "step": 12866 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6205115512473724e-06, "loss": 0.4288, "step": 12867 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6191656725794856e-06, "loss": 0.5257, "step": 12868 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.617820087533831e-06, "loss": 0.4671, "step": 12869 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.61647479616394e-06, "loss": 0.4447, "step": 12870 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6151297985233313e-06, "loss": 0.4216, "step": 12871 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6137850946655076e-06, "loss": 0.3294, "step": 12872 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6124406846439678e-06, "loss": 0.4589, "step": 12873 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6110965685121937e-06, "loss": 0.3707, "step": 12874 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6097527463236584e-06, "loss": 0.4039, "step": 12875 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6084092181318166e-06, "loss": 0.4218, "step": 12876 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.607065983990116e-06, "loss": 0.4214, "step": 12877 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6057230439519974e-06, "loss": 0.3964, "step": 12878 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6043803980708838e-06, "loss": 0.4832, "step": 12879 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.603038046400187e-06, "loss": 0.3871, "step": 12880 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.601695988993309e-06, "loss": 0.4095, "step": 12881 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.6003542259036376e-06, "loss": 0.4275, "step": 12882 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5990127571845524e-06, "loss": 0.4675, "step": 12883 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5976715828894174e-06, "loss": 0.4394, "step": 12884 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.596330703071589e-06, "loss": 0.5377, "step": 12885 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5949901177844073e-06, "loss": 0.4352, "step": 12886 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.593649827081205e-06, "loss": 0.4009, "step": 12887 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5923098310152995e-06, "loss": 0.5269, "step": 12888 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.590970129639996e-06, "loss": 0.4466, "step": 12889 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5896307230085994e-06, "loss": 0.4232, "step": 12890 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.588291611174385e-06, "loss": 0.4943, "step": 12891 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.586952794190627e-06, "loss": 0.4634, "step": 12892 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5856142721105848e-06, "loss": 0.4827, "step": 12893 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.584276044987507e-06, "loss": 0.3939, "step": 12894 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5829381128746344e-06, "loss": 0.3896, "step": 12895 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.58160047582519e-06, "loss": 0.3746, "step": 12896 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.580263133892389e-06, "loss": 0.4012, "step": 12897 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.57892608712943e-06, "loss": 0.4046, "step": 12898 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5775893355895e-06, "loss": 0.3832, "step": 12899 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5762528793257846e-06, "loss": 0.3478, "step": 12900 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5749167183914482e-06, "loss": 0.3756, "step": 12901 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5735808528396432e-06, "loss": 0.4238, "step": 12902 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5722452827235155e-06, "loss": 0.4489, "step": 12903 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5709100080961934e-06, "loss": 0.4851, "step": 12904 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.569575029010799e-06, "loss": 0.3351, "step": 12905 }, { "epoch": 2.33, "grad_norm": 0.0, "learning_rate": 2.5682403455204386e-06, "loss": 0.3841, "step": 12906 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5669059576782085e-06, "loss": 0.4631, "step": 12907 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.565571865537193e-06, "loss": 0.3822, "step": 12908 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5642380691504655e-06, "loss": 0.4323, "step": 12909 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.562904568571082e-06, "loss": 0.4356, "step": 12910 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.561571363852098e-06, "loss": 0.3831, "step": 12911 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5602384550465478e-06, "loss": 0.3992, "step": 12912 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.55890584220746e-06, "loss": 0.337, "step": 12913 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.557573525387842e-06, "loss": 0.4966, "step": 12914 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.556241504640696e-06, "loss": 0.4615, "step": 12915 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5549097800190182e-06, "loss": 0.3992, "step": 12916 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.553578351575783e-06, "loss": 0.4714, "step": 12917 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.552247219363958e-06, "loss": 0.3251, "step": 12918 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.550916383436497e-06, "loss": 0.4121, "step": 12919 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5495858438463427e-06, "loss": 0.405, "step": 12920 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5482556006464277e-06, "loss": 0.4131, "step": 12921 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.546925653889669e-06, "loss": 0.3845, "step": 12922 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5455960036289773e-06, "loss": 0.3874, "step": 12923 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.544266649917245e-06, "loss": 0.4836, "step": 12924 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5429375928073594e-06, "loss": 0.478, "step": 12925 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.541608832352187e-06, "loss": 0.4745, "step": 12926 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.540280368604595e-06, "loss": 0.3632, "step": 12927 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5389522016174327e-06, "loss": 0.4826, "step": 12928 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5376243314435314e-06, "loss": 0.4075, "step": 12929 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5362967581357155e-06, "loss": 0.4757, "step": 12930 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5349694817467997e-06, "loss": 0.4043, "step": 12931 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5336425023295886e-06, "loss": 0.3803, "step": 12932 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.532315819936869e-06, "loss": 0.4334, "step": 12933 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5309894346214194e-06, "loss": 0.3832, "step": 12934 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5296633464360076e-06, "loss": 0.3988, "step": 12935 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.528337555433379e-06, "loss": 0.4957, "step": 12936 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5270120616662864e-06, "loss": 0.4658, "step": 12937 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5256868651874542e-06, "loss": 0.4089, "step": 12938 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.524361966049604e-06, "loss": 0.4099, "step": 12939 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5230373643054408e-06, "loss": 0.4371, "step": 12940 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.52171306000766e-06, "loss": 0.3828, "step": 12941 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.520389053208945e-06, "loss": 0.3961, "step": 12942 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.519065343961967e-06, "loss": 0.486, "step": 12943 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.517741932319384e-06, "loss": 0.3833, "step": 12944 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5164188183338466e-06, "loss": 0.4044, "step": 12945 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5150960020579874e-06, "loss": 0.4503, "step": 12946 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5137734835444317e-06, "loss": 0.4733, "step": 12947 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5124512628457888e-06, "loss": 0.4452, "step": 12948 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.511129340014664e-06, "loss": 0.4907, "step": 12949 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.509807715103646e-06, "loss": 0.3627, "step": 12950 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5084863881653065e-06, "loss": 0.3996, "step": 12951 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.507165359252212e-06, "loss": 0.3727, "step": 12952 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.505844628416911e-06, "loss": 0.466, "step": 12953 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5045241957119538e-06, "loss": 0.3526, "step": 12954 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5032040611898635e-06, "loss": 0.3974, "step": 12955 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5018842249031583e-06, "loss": 0.3816, "step": 12956 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.5005646869043443e-06, "loss": 0.3871, "step": 12957 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.499245447245914e-06, "loss": 0.4251, "step": 12958 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.4979265059803493e-06, "loss": 0.447, "step": 12959 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.49660786316012e-06, "loss": 0.3684, "step": 12960 }, { "epoch": 2.34, "grad_norm": 0.0, "learning_rate": 2.4952895188376838e-06, "loss": 0.4047, "step": 12961 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.493971473065486e-06, "loss": 0.487, "step": 12962 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4926537258959627e-06, "loss": 0.4151, "step": 12963 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.491336277381532e-06, "loss": 0.4544, "step": 12964 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.49001912757461e-06, "loss": 0.3668, "step": 12965 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4887022765275946e-06, "loss": 0.4185, "step": 12966 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.487385724292869e-06, "loss": 0.3894, "step": 12967 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4860694709228075e-06, "loss": 0.3832, "step": 12968 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4847535164697724e-06, "loss": 0.3999, "step": 12969 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4834378609861186e-06, "loss": 0.3966, "step": 12970 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.482122504524185e-06, "loss": 0.3605, "step": 12971 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4808074471362953e-06, "loss": 0.4059, "step": 12972 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4794926888747706e-06, "loss": 0.3846, "step": 12973 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4781782297919033e-06, "loss": 0.4131, "step": 12974 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4768640699399948e-06, "loss": 0.4314, "step": 12975 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4755502093713225e-06, "loss": 0.3912, "step": 12976 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.474236648138152e-06, "loss": 0.4719, "step": 12977 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4729233862927405e-06, "loss": 0.454, "step": 12978 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4716104238873305e-06, "loss": 0.3754, "step": 12979 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.470297760974155e-06, "loss": 0.4365, "step": 12980 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4689853976054336e-06, "loss": 0.3929, "step": 12981 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.467673333833375e-06, "loss": 0.4452, "step": 12982 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.466361569710174e-06, "loss": 0.3812, "step": 12983 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.465050105288016e-06, "loss": 0.4075, "step": 12984 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4637389406190727e-06, "loss": 0.5017, "step": 12985 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.462428075755502e-06, "loss": 0.3873, "step": 12986 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.461117510749458e-06, "loss": 0.4823, "step": 12987 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4598072456530764e-06, "loss": 0.3767, "step": 12988 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4584972805184783e-06, "loss": 0.4349, "step": 12989 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.457187615397776e-06, "loss": 0.4069, "step": 12990 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.45587825034307e-06, "loss": 0.441, "step": 12991 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4545691854064535e-06, "loss": 0.4088, "step": 12992 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4532604206400014e-06, "loss": 0.4902, "step": 12993 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.451951956095777e-06, "loss": 0.4653, "step": 12994 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.450643791825835e-06, "loss": 0.3872, "step": 12995 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.449335927882216e-06, "loss": 0.4353, "step": 12996 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.448028364316948e-06, "loss": 0.3959, "step": 12997 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.446721101182049e-06, "loss": 0.4078, "step": 12998 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.445414138529525e-06, "loss": 0.3784, "step": 12999 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.444107476411368e-06, "loss": 0.4882, "step": 13000 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4428011148795596e-06, "loss": 0.4442, "step": 13001 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.441495053986065e-06, "loss": 0.4471, "step": 13002 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.440189293782852e-06, "loss": 0.5042, "step": 13003 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.438883834321857e-06, "loss": 0.4992, "step": 13004 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4375786756550157e-06, "loss": 0.4263, "step": 13005 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4362738178342504e-06, "loss": 0.3934, "step": 13006 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4349692609114674e-06, "loss": 0.4157, "step": 13007 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4336650049385678e-06, "loss": 0.4531, "step": 13008 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4323610499674377e-06, "loss": 0.3934, "step": 13009 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4310573960499495e-06, "loss": 0.4222, "step": 13010 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.429754043237966e-06, "loss": 0.38, "step": 13011 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.42845099158333e-06, "loss": 0.3743, "step": 13012 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4271482411378876e-06, "loss": 0.4459, "step": 13013 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.4258457919534604e-06, "loss": 0.4019, "step": 13014 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.424543644081864e-06, "loss": 0.3659, "step": 13015 }, { "epoch": 2.35, "grad_norm": 0.0, "learning_rate": 2.423241797574898e-06, "loss": 0.3814, "step": 13016 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.421940252484353e-06, "loss": 0.4523, "step": 13017 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.420639008862007e-06, "loss": 0.4449, "step": 13018 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.4193380667596243e-06, "loss": 0.3741, "step": 13019 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.418037426228962e-06, "loss": 0.4391, "step": 13020 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.416737087321758e-06, "loss": 0.4584, "step": 13021 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.415437050089743e-06, "loss": 0.4548, "step": 13022 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.414137314584636e-06, "loss": 0.4807, "step": 13023 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.4128378808581386e-06, "loss": 0.3814, "step": 13024 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.4115387489619512e-06, "loss": 0.385, "step": 13025 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.4102399189477553e-06, "loss": 0.4315, "step": 13026 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.4089413908672143e-06, "loss": 0.4564, "step": 13027 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.4076431647719887e-06, "loss": 0.4041, "step": 13028 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.406345240713721e-06, "loss": 0.4062, "step": 13029 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.405047618744052e-06, "loss": 0.35, "step": 13030 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.4037502989146e-06, "loss": 0.4749, "step": 13031 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.402453281276973e-06, "loss": 0.3827, "step": 13032 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.401156565882771e-06, "loss": 0.3807, "step": 13033 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3998601527835774e-06, "loss": 0.4079, "step": 13034 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3985640420309663e-06, "loss": 0.3899, "step": 13035 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.397268233676501e-06, "loss": 0.3687, "step": 13036 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.395972727771728e-06, "loss": 0.4239, "step": 13037 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3946775243681873e-06, "loss": 0.3515, "step": 13038 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.393382623517404e-06, "loss": 0.3765, "step": 13039 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3920880252708867e-06, "loss": 0.4436, "step": 13040 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3907937296801476e-06, "loss": 0.4119, "step": 13041 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3894997367966655e-06, "loss": 0.4908, "step": 13042 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3882060466719216e-06, "loss": 0.4418, "step": 13043 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3869126593573812e-06, "loss": 0.3583, "step": 13044 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.385619574904493e-06, "loss": 0.37, "step": 13045 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3843267933647064e-06, "loss": 0.3844, "step": 13046 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.383034314789446e-06, "loss": 0.4203, "step": 13047 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.38174213923013e-06, "loss": 0.3727, "step": 13048 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3804502667381648e-06, "loss": 0.4215, "step": 13049 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.379158697364934e-06, "loss": 0.5914, "step": 13050 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.37786743116183e-06, "loss": 0.3851, "step": 13051 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.376576468180216e-06, "loss": 0.481, "step": 13052 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.37528580847145e-06, "loss": 0.3921, "step": 13053 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.373995452086877e-06, "loss": 0.4641, "step": 13054 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3727053990778283e-06, "loss": 0.4413, "step": 13055 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3714156494956254e-06, "loss": 0.5186, "step": 13056 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.370126203391576e-06, "loss": 0.4318, "step": 13057 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3688370608169775e-06, "loss": 0.4005, "step": 13058 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3675482218231137e-06, "loss": 0.4354, "step": 13059 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3662596864612562e-06, "loss": 0.3797, "step": 13060 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.364971454782663e-06, "loss": 0.4114, "step": 13061 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.363683526838588e-06, "loss": 0.5353, "step": 13062 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3623959026802634e-06, "loss": 0.4217, "step": 13063 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3611085823589177e-06, "loss": 0.4012, "step": 13064 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.359821565925755e-06, "loss": 0.4935, "step": 13065 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3585348534319763e-06, "loss": 0.4812, "step": 13066 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3572484449287746e-06, "loss": 0.4361, "step": 13067 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3559623404673214e-06, "loss": 0.4924, "step": 13068 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.3546765400987824e-06, "loss": 0.3485, "step": 13069 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.353391043874308e-06, "loss": 0.4248, "step": 13070 }, { "epoch": 2.36, "grad_norm": 0.0, "learning_rate": 2.352105851845037e-06, "loss": 0.3651, "step": 13071 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3508209640620962e-06, "loss": 0.4917, "step": 13072 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3495363805766015e-06, "loss": 0.4017, "step": 13073 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3482521014396552e-06, "loss": 0.4012, "step": 13074 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.346968126702349e-06, "loss": 0.44, "step": 13075 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3456844564157612e-06, "loss": 0.393, "step": 13076 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.344401090630959e-06, "loss": 0.466, "step": 13077 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.343118029398992e-06, "loss": 0.4212, "step": 13078 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3418352727709124e-06, "loss": 0.3946, "step": 13079 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3405528207977424e-06, "loss": 0.4435, "step": 13080 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3392706735305027e-06, "loss": 0.4611, "step": 13081 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3379888310201993e-06, "loss": 0.3838, "step": 13082 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3367072933178225e-06, "loss": 0.436, "step": 13083 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3354260604743615e-06, "loss": 0.4257, "step": 13084 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3341451325407815e-06, "loss": 0.4147, "step": 13085 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3328645095680402e-06, "loss": 0.3222, "step": 13086 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3315841916070856e-06, "loss": 0.3735, "step": 13087 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.330304178708843e-06, "loss": 0.3894, "step": 13088 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3290244709242426e-06, "loss": 0.5001, "step": 13089 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.327745068304189e-06, "loss": 0.3681, "step": 13090 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.326465970899581e-06, "loss": 0.4513, "step": 13091 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.325187178761301e-06, "loss": 0.4248, "step": 13092 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.323908691940224e-06, "loss": 0.4464, "step": 13093 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.322630510487208e-06, "loss": 0.4395, "step": 13094 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3213526344531036e-06, "loss": 0.4344, "step": 13095 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.320075063888745e-06, "loss": 0.5174, "step": 13096 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.318797798844956e-06, "loss": 0.4934, "step": 13097 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3175208393725502e-06, "loss": 0.4912, "step": 13098 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.316244185522323e-06, "loss": 0.4351, "step": 13099 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.314967837345068e-06, "loss": 0.3421, "step": 13100 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3136917948915573e-06, "loss": 0.4904, "step": 13101 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3124160582125575e-06, "loss": 0.3733, "step": 13102 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.311140627358813e-06, "loss": 0.4584, "step": 13103 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3098655023810646e-06, "loss": 0.4248, "step": 13104 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.308590683330042e-06, "loss": 0.3733, "step": 13105 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3073161702564595e-06, "loss": 0.435, "step": 13106 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3060419632110165e-06, "loss": 0.4922, "step": 13107 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3047680622444058e-06, "loss": 0.3833, "step": 13108 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3034944674073034e-06, "loss": 0.4847, "step": 13109 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.302221178750377e-06, "loss": 0.4194, "step": 13110 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.3009481963242787e-06, "loss": 0.4506, "step": 13111 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.2996755201796497e-06, "loss": 0.5336, "step": 13112 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.29840315036712e-06, "loss": 0.4126, "step": 13113 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.2971310869373075e-06, "loss": 0.4735, "step": 13114 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.295859329940815e-06, "loss": 0.3847, "step": 13115 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.294587879428234e-06, "loss": 0.4068, "step": 13116 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.293316735450153e-06, "loss": 0.4229, "step": 13117 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.292045898057131e-06, "loss": 0.457, "step": 13118 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.2907753672997292e-06, "loss": 0.4614, "step": 13119 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.289505143228489e-06, "loss": 0.4112, "step": 13120 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.28823522589394e-06, "loss": 0.4611, "step": 13121 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.286965615346608e-06, "loss": 0.4741, "step": 13122 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.285696311636997e-06, "loss": 0.3732, "step": 13123 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.2844273148156015e-06, "loss": 0.437, "step": 13124 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.2831586249329075e-06, "loss": 0.4323, "step": 13125 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.2818902420393774e-06, "loss": 0.4224, "step": 13126 }, { "epoch": 2.37, "grad_norm": 0.0, "learning_rate": 2.280622166185479e-06, "loss": 0.4716, "step": 13127 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.279354397421655e-06, "loss": 0.4796, "step": 13128 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2780869357983383e-06, "loss": 0.4172, "step": 13129 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.276819781365952e-06, "loss": 0.456, "step": 13130 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.275552934174906e-06, "loss": 0.4044, "step": 13131 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2742863942755965e-06, "loss": 0.3574, "step": 13132 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2730201617184087e-06, "loss": 0.5083, "step": 13133 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.271754236553716e-06, "loss": 0.5424, "step": 13134 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2704886188318796e-06, "loss": 0.494, "step": 13135 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2692233086032468e-06, "loss": 0.4738, "step": 13136 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.267958305918152e-06, "loss": 0.4191, "step": 13137 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2666936108269245e-06, "loss": 0.3321, "step": 13138 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2654292233798724e-06, "loss": 0.3878, "step": 13139 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.264165143627299e-06, "loss": 0.367, "step": 13140 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.262901371619486e-06, "loss": 0.4596, "step": 13141 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2616379074067063e-06, "loss": 0.4403, "step": 13142 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2603747510392316e-06, "loss": 0.4706, "step": 13143 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2591119025673082e-06, "loss": 0.4536, "step": 13144 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2578493620411733e-06, "loss": 0.429, "step": 13145 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2565871295110543e-06, "loss": 0.4067, "step": 13146 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2553252050271636e-06, "loss": 0.4068, "step": 13147 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2540635886397045e-06, "loss": 0.4586, "step": 13148 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2528022803988658e-06, "loss": 0.4248, "step": 13149 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.251541280354823e-06, "loss": 0.4081, "step": 13150 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2502805885577426e-06, "loss": 0.4511, "step": 13151 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.249020205057776e-06, "loss": 0.4711, "step": 13152 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.247760129905061e-06, "loss": 0.4177, "step": 13153 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.246500363149732e-06, "loss": 0.47, "step": 13154 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2452409048419023e-06, "loss": 0.4179, "step": 13155 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2439817550316723e-06, "loss": 0.3547, "step": 13156 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2427229137691353e-06, "loss": 0.4164, "step": 13157 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.241464381104366e-06, "loss": 0.3915, "step": 13158 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.240206157087438e-06, "loss": 0.365, "step": 13159 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2389482417684014e-06, "loss": 0.327, "step": 13160 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2376906351973004e-06, "loss": 0.3563, "step": 13161 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2364333374241666e-06, "loss": 0.455, "step": 13162 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2351763484990074e-06, "loss": 0.4357, "step": 13163 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.233919668471839e-06, "loss": 0.4045, "step": 13164 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.232663297392649e-06, "loss": 0.4181, "step": 13165 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.23140723531142e-06, "loss": 0.4236, "step": 13166 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.230151482278119e-06, "loss": 0.4565, "step": 13167 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.228896038342703e-06, "loss": 0.4155, "step": 13168 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.227640903555115e-06, "loss": 0.3871, "step": 13169 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.226386077965287e-06, "loss": 0.3925, "step": 13170 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2251315616231373e-06, "loss": 0.3412, "step": 13171 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.223877354578573e-06, "loss": 0.4059, "step": 13172 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.22262345688149e-06, "loss": 0.4229, "step": 13173 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2213698685817676e-06, "loss": 0.4595, "step": 13174 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2201165897292755e-06, "loss": 0.4222, "step": 13175 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2188636203738755e-06, "loss": 0.3667, "step": 13176 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2176109605654107e-06, "loss": 0.3986, "step": 13177 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2163586103537172e-06, "loss": 0.4354, "step": 13178 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2151065697886086e-06, "loss": 0.3758, "step": 13179 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2138548389198955e-06, "loss": 0.4748, "step": 13180 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2126034177973767e-06, "loss": 0.3535, "step": 13181 }, { "epoch": 2.38, "grad_norm": 0.0, "learning_rate": 2.2113523064708352e-06, "loss": 0.4269, "step": 13182 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.210101504990041e-06, "loss": 0.3852, "step": 13183 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.208851013404755e-06, "loss": 0.3808, "step": 13184 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.2076008317647212e-06, "loss": 0.4325, "step": 13185 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.2063509601196766e-06, "loss": 0.4916, "step": 13186 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.2051013985193414e-06, "loss": 0.3683, "step": 13187 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.2038521470134254e-06, "loss": 0.4538, "step": 13188 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.202603205651628e-06, "loss": 0.4802, "step": 13189 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.2013545744836316e-06, "loss": 0.3976, "step": 13190 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.200106253559108e-06, "loss": 0.3647, "step": 13191 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1988582429277215e-06, "loss": 0.4576, "step": 13192 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1976105426391214e-06, "loss": 0.4068, "step": 13193 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1963631527429373e-06, "loss": 0.3874, "step": 13194 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1951160732887964e-06, "loss": 0.3982, "step": 13195 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.193869304326305e-06, "loss": 0.4059, "step": 13196 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.192622845905068e-06, "loss": 0.3953, "step": 13197 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1913766980746688e-06, "loss": 0.4975, "step": 13198 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1901308608846816e-06, "loss": 0.4611, "step": 13199 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1888853343846706e-06, "loss": 0.3781, "step": 13200 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.187640118624177e-06, "loss": 0.4095, "step": 13201 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1863952136527454e-06, "loss": 0.4258, "step": 13202 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.185150619519897e-06, "loss": 0.4001, "step": 13203 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1839063362751456e-06, "loss": 0.3819, "step": 13204 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.182662363967989e-06, "loss": 0.5287, "step": 13205 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.181418702647916e-06, "loss": 0.4152, "step": 13206 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.180175352364401e-06, "loss": 0.3472, "step": 13207 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.178932313166906e-06, "loss": 0.3901, "step": 13208 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.177689585104882e-06, "loss": 0.4655, "step": 13209 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1764471682277668e-06, "loss": 0.4405, "step": 13210 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1752050625849854e-06, "loss": 0.4663, "step": 13211 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1739632682259516e-06, "loss": 0.3936, "step": 13212 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1727217852000625e-06, "loss": 0.4073, "step": 13213 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1714806135567134e-06, "loss": 0.4255, "step": 13214 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.170239753345279e-06, "loss": 0.421, "step": 13215 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1689992046151166e-06, "loss": 0.3875, "step": 13216 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.167758967415582e-06, "loss": 0.3705, "step": 13217 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.16651904179601e-06, "loss": 0.449, "step": 13218 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.165279427805733e-06, "loss": 0.4253, "step": 13219 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1640401254940613e-06, "loss": 0.4783, "step": 13220 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.162801134910298e-06, "loss": 0.406, "step": 13221 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1615624561037306e-06, "loss": 0.4153, "step": 13222 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1603240891236375e-06, "loss": 0.4265, "step": 13223 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.159086034019282e-06, "loss": 0.3927, "step": 13224 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1578482908399157e-06, "loss": 0.4093, "step": 13225 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.15661085963478e-06, "loss": 0.4928, "step": 13226 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1553737404530993e-06, "loss": 0.4381, "step": 13227 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.154136933344091e-06, "loss": 0.4051, "step": 13228 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.152900438356953e-06, "loss": 0.4696, "step": 13229 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1516642555408806e-06, "loss": 0.4277, "step": 13230 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1504283849450523e-06, "loss": 0.4266, "step": 13231 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.149192826618628e-06, "loss": 0.4471, "step": 13232 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.147957580610761e-06, "loss": 0.392, "step": 13233 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1467226469705904e-06, "loss": 0.3722, "step": 13234 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1454880257472488e-06, "loss": 0.4529, "step": 13235 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1442537169898492e-06, "loss": 0.3994, "step": 13236 }, { "epoch": 2.39, "grad_norm": 0.0, "learning_rate": 2.1430197207474947e-06, "loss": 0.3892, "step": 13237 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1417860370692776e-06, "loss": 0.4617, "step": 13238 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1405526660042685e-06, "loss": 0.3994, "step": 13239 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.139319607601542e-06, "loss": 0.3353, "step": 13240 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.138086861910147e-06, "loss": 0.3814, "step": 13241 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.136854428979125e-06, "loss": 0.4296, "step": 13242 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1356223088575035e-06, "loss": 0.4289, "step": 13243 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.134390501594299e-06, "loss": 0.4485, "step": 13244 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.133159007238512e-06, "loss": 0.4366, "step": 13245 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1319278258391427e-06, "loss": 0.4596, "step": 13246 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1306969574451595e-06, "loss": 0.4239, "step": 13247 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1294664021055335e-06, "loss": 0.5199, "step": 13248 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1282361598692157e-06, "loss": 0.4749, "step": 13249 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.127006230785147e-06, "loss": 0.4161, "step": 13250 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1257766149022596e-06, "loss": 0.3539, "step": 13251 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.124547312269468e-06, "loss": 0.4675, "step": 13252 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.123318322935678e-06, "loss": 0.4835, "step": 13253 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1220896469497767e-06, "loss": 0.4369, "step": 13254 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1208612843606413e-06, "loss": 0.5171, "step": 13255 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1196332352171445e-06, "loss": 0.424, "step": 13256 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.118405499568138e-06, "loss": 0.356, "step": 13257 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.117178077462463e-06, "loss": 0.3816, "step": 13258 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1159509689489464e-06, "loss": 0.4421, "step": 13259 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.114724174076408e-06, "loss": 0.4517, "step": 13260 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.11349769289365e-06, "loss": 0.3967, "step": 13261 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.112271525449463e-06, "loss": 0.4279, "step": 13262 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.111045671792629e-06, "loss": 0.4715, "step": 13263 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1098201319719114e-06, "loss": 0.3878, "step": 13264 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1085949060360654e-06, "loss": 0.4509, "step": 13265 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.107369994033833e-06, "loss": 0.4513, "step": 13266 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1061453960139413e-06, "loss": 0.4306, "step": 13267 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1049211120251123e-06, "loss": 0.4051, "step": 13268 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1036971421160447e-06, "loss": 0.3692, "step": 13269 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.102473486335432e-06, "loss": 0.4262, "step": 13270 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1012501447319535e-06, "loss": 0.4106, "step": 13271 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1000271173542718e-06, "loss": 0.4319, "step": 13272 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.098804404251049e-06, "loss": 0.3597, "step": 13273 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.097582005470922e-06, "loss": 0.4647, "step": 13274 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.0963599210625197e-06, "loss": 0.4303, "step": 13275 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.0951381510744627e-06, "loss": 0.3982, "step": 13276 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.093916695555346e-06, "loss": 0.4202, "step": 13277 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.09269555455377e-06, "loss": 0.5238, "step": 13278 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.091474728118311e-06, "loss": 0.4562, "step": 13279 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.090254216297535e-06, "loss": 0.363, "step": 13280 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.0890340191399973e-06, "loss": 0.4566, "step": 13281 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.0878141366942373e-06, "loss": 0.4466, "step": 13282 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.086594569008783e-06, "loss": 0.4425, "step": 13283 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.085375316132159e-06, "loss": 0.4065, "step": 13284 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.0841563781128606e-06, "loss": 0.3945, "step": 13285 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.082937754999381e-06, "loss": 0.4331, "step": 13286 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.081719446840201e-06, "loss": 0.4388, "step": 13287 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.080501453683784e-06, "loss": 0.3807, "step": 13288 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.0792837755785877e-06, "loss": 0.3651, "step": 13289 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.078066412573053e-06, "loss": 0.4504, "step": 13290 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.0768493647156095e-06, "loss": 0.4224, "step": 13291 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.0756326320546682e-06, "loss": 0.388, "step": 13292 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.0744162146386338e-06, "loss": 0.3556, "step": 13293 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.073200112515902e-06, "loss": 0.4453, "step": 13294 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0719843257348494e-06, "loss": 0.414, "step": 13295 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.070768854343842e-06, "loss": 0.4013, "step": 13296 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0695536983912323e-06, "loss": 0.3511, "step": 13297 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0683388579253627e-06, "loss": 0.4315, "step": 13298 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0671243329945614e-06, "loss": 0.452, "step": 13299 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0659101236471433e-06, "loss": 0.3686, "step": 13300 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0646962299314123e-06, "loss": 0.4451, "step": 13301 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0634826518956595e-06, "loss": 0.4835, "step": 13302 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.062269389588163e-06, "loss": 0.3379, "step": 13303 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.061056443057188e-06, "loss": 0.4279, "step": 13304 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0598438123509845e-06, "loss": 0.501, "step": 13305 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.058631497517802e-06, "loss": 0.4359, "step": 13306 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0574194986058617e-06, "loss": 0.4761, "step": 13307 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.056207815663378e-06, "loss": 0.4472, "step": 13308 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0549964487385577e-06, "loss": 0.4349, "step": 13309 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0537853978795853e-06, "loss": 0.4594, "step": 13310 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0525746631346454e-06, "loss": 0.4716, "step": 13311 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0513642445519e-06, "loss": 0.3886, "step": 13312 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0501541421795025e-06, "loss": 0.3931, "step": 13313 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.048944356065593e-06, "loss": 0.4063, "step": 13314 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0477348862582947e-06, "loss": 0.4542, "step": 13315 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0465257328057265e-06, "loss": 0.4527, "step": 13316 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.045316895755991e-06, "loss": 0.5128, "step": 13317 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0441083751571755e-06, "loss": 0.4216, "step": 13318 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0429001710573592e-06, "loss": 0.445, "step": 13319 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0416922835046037e-06, "loss": 0.3116, "step": 13320 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.040484712546964e-06, "loss": 0.4155, "step": 13321 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.039277458232477e-06, "loss": 0.4315, "step": 13322 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.038070520609171e-06, "loss": 0.3617, "step": 13323 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.036863899725059e-06, "loss": 0.4251, "step": 13324 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.035657595628142e-06, "loss": 0.441, "step": 13325 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0344516083664078e-06, "loss": 0.4205, "step": 13326 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0332459379878356e-06, "loss": 0.4372, "step": 13327 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.032040584540389e-06, "loss": 0.4135, "step": 13328 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0308355480720197e-06, "loss": 0.4527, "step": 13329 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.029630828630663e-06, "loss": 0.463, "step": 13330 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0284264262642415e-06, "loss": 0.4233, "step": 13331 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0272223410206772e-06, "loss": 0.4061, "step": 13332 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0260185729478652e-06, "loss": 0.465, "step": 13333 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0248151220936942e-06, "loss": 0.4206, "step": 13334 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.023611988506041e-06, "loss": 0.4106, "step": 13335 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0224091722327665e-06, "loss": 0.41, "step": 13336 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0212066733217215e-06, "loss": 0.4094, "step": 13337 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.020004491820744e-06, "loss": 0.421, "step": 13338 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0188026277776575e-06, "loss": 0.4221, "step": 13339 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0176010812402757e-06, "loss": 0.3516, "step": 13340 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0163998522563975e-06, "loss": 0.4825, "step": 13341 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.015198940873807e-06, "loss": 0.4134, "step": 13342 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0139983471402836e-06, "loss": 0.376, "step": 13343 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.01279807110359e-06, "loss": 0.3802, "step": 13344 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0115981128114693e-06, "loss": 0.4489, "step": 13345 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0103984723116597e-06, "loss": 0.3843, "step": 13346 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.0091991496518837e-06, "loss": 0.3744, "step": 13347 }, { "epoch": 2.41, "grad_norm": 0.0, "learning_rate": 2.008000144879857e-06, "loss": 0.4688, "step": 13348 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 2.006801458043276e-06, "loss": 0.4678, "step": 13349 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 2.005603089189826e-06, "loss": 0.3875, "step": 13350 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 2.004405038367181e-06, "loss": 0.3829, "step": 13351 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 2.0032073056229994e-06, "loss": 0.3972, "step": 13352 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 2.0020098910049315e-06, "loss": 0.4586, "step": 13353 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 2.0008127945606105e-06, "loss": 0.4175, "step": 13354 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.999616016337661e-06, "loss": 0.418, "step": 13355 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9984195563836907e-06, "loss": 0.4511, "step": 13356 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.997223414746299e-06, "loss": 0.4626, "step": 13357 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9960275914730686e-06, "loss": 0.4019, "step": 13358 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9948320866115723e-06, "loss": 0.3617, "step": 13359 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.993636900209369e-06, "loss": 0.4383, "step": 13360 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.992442032314006e-06, "loss": 0.4148, "step": 13361 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9912474829730166e-06, "loss": 0.5056, "step": 13362 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.990053252233921e-06, "loss": 0.3838, "step": 13363 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.988859340144227e-06, "loss": 0.4432, "step": 13364 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.987665746751436e-06, "loss": 0.3813, "step": 13365 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.986472472103026e-06, "loss": 0.4957, "step": 13366 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9852795162464723e-06, "loss": 0.3412, "step": 13367 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9840868792292267e-06, "loss": 0.4508, "step": 13368 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9828945610987337e-06, "loss": 0.3995, "step": 13369 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.981702561902432e-06, "loss": 0.3851, "step": 13370 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9805108816877394e-06, "loss": 0.4311, "step": 13371 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.979319520502062e-06, "loss": 0.3785, "step": 13372 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9781284783927932e-06, "loss": 0.3966, "step": 13373 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9769377554073165e-06, "loss": 0.4401, "step": 13374 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9757473515930005e-06, "loss": 0.3572, "step": 13375 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9745572669972004e-06, "loss": 0.4711, "step": 13376 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9733675016672606e-06, "loss": 0.446, "step": 13377 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.972178055650512e-06, "loss": 0.3565, "step": 13378 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.970988928994274e-06, "loss": 0.4841, "step": 13379 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.969800121745846e-06, "loss": 0.3997, "step": 13380 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9686116339525307e-06, "loss": 0.3926, "step": 13381 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.967423465661604e-06, "loss": 0.4028, "step": 13382 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.96623561692033e-06, "loss": 0.4613, "step": 13383 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.965048087775967e-06, "loss": 0.4513, "step": 13384 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9638608782757517e-06, "loss": 0.4159, "step": 13385 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.962673988466921e-06, "loss": 0.4734, "step": 13386 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9614874183966868e-06, "loss": 0.4535, "step": 13387 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.960301168112254e-06, "loss": 0.3807, "step": 13388 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9591152376608137e-06, "loss": 0.4324, "step": 13389 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9579296270895444e-06, "loss": 0.4925, "step": 13390 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9567443364456106e-06, "loss": 0.4057, "step": 13391 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.955559365776166e-06, "loss": 0.5106, "step": 13392 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.954374715128352e-06, "loss": 0.4092, "step": 13393 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9531903845492937e-06, "loss": 0.4217, "step": 13394 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9520063740861074e-06, "loss": 0.415, "step": 13395 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9508226837858935e-06, "loss": 0.4494, "step": 13396 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9496393136957424e-06, "loss": 0.4531, "step": 13397 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9484562638627313e-06, "loss": 0.422, "step": 13398 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.947273534333923e-06, "loss": 0.3924, "step": 13399 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9460911251563684e-06, "loss": 0.4674, "step": 13400 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.944909036377105e-06, "loss": 0.3846, "step": 13401 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9437272680431584e-06, "loss": 0.396, "step": 13402 }, { "epoch": 2.42, "grad_norm": 0.0, "learning_rate": 1.9425458202015437e-06, "loss": 0.4113, "step": 13403 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9413646928992593e-06, "loss": 0.3991, "step": 13404 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9401838861832957e-06, "loss": 0.3606, "step": 13405 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.939003400100622e-06, "loss": 0.4269, "step": 13406 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9378232346981996e-06, "loss": 0.4315, "step": 13407 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.936643390022982e-06, "loss": 0.4007, "step": 13408 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9354638661219036e-06, "loss": 0.469, "step": 13409 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.934284663041889e-06, "loss": 0.4589, "step": 13410 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9331057808298483e-06, "loss": 0.3513, "step": 13411 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9319272195326777e-06, "loss": 0.3952, "step": 13412 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9307489791972633e-06, "loss": 0.3809, "step": 13413 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9295710598704787e-06, "loss": 0.4572, "step": 13414 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.928393461599183e-06, "loss": 0.4392, "step": 13415 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9272161844302226e-06, "loss": 0.4638, "step": 13416 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9260392284104314e-06, "loss": 0.3713, "step": 13417 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9248625935866293e-06, "loss": 0.4541, "step": 13418 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.923686280005629e-06, "loss": 0.4229, "step": 13419 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9225102877142266e-06, "loss": 0.398, "step": 13420 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9213346167591994e-06, "loss": 0.5097, "step": 13421 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9201592671873205e-06, "loss": 0.4185, "step": 13422 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.918984239045346e-06, "loss": 0.5371, "step": 13423 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.917809532380024e-06, "loss": 0.4065, "step": 13424 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.916635147238084e-06, "loss": 0.4641, "step": 13425 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9154610836662457e-06, "loss": 0.465, "step": 13426 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.914287341711215e-06, "loss": 0.3957, "step": 13427 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9131139214196857e-06, "loss": 0.4137, "step": 13428 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9119408228383385e-06, "loss": 0.361, "step": 13429 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.91076804601384e-06, "loss": 0.4944, "step": 13430 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9095955909928478e-06, "loss": 0.4278, "step": 13431 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9084234578220007e-06, "loss": 0.4542, "step": 13432 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.907251646547932e-06, "loss": 0.4686, "step": 13433 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9060801572172527e-06, "loss": 0.4501, "step": 13434 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.904908989876575e-06, "loss": 0.4329, "step": 13435 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9037381445724845e-06, "loss": 0.4514, "step": 13436 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9025676213515587e-06, "loss": 0.4152, "step": 13437 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9013974202603657e-06, "loss": 0.4565, "step": 13438 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.9002275413454563e-06, "loss": 0.3592, "step": 13439 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8990579846533686e-06, "loss": 0.4696, "step": 13440 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8978887502306343e-06, "loss": 0.422, "step": 13441 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.896719838123765e-06, "loss": 0.4462, "step": 13442 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8955512483792659e-06, "loss": 0.3789, "step": 13443 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8943829810436188e-06, "loss": 0.4454, "step": 13444 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8932150361632995e-06, "loss": 0.4325, "step": 13445 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8920474137847767e-06, "loss": 0.4877, "step": 13446 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8908801139544964e-06, "loss": 0.4708, "step": 13447 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8897131367188981e-06, "loss": 0.3809, "step": 13448 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8885464821244048e-06, "loss": 0.403, "step": 13449 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8873801502174271e-06, "loss": 0.44, "step": 13450 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8862141410443657e-06, "loss": 0.484, "step": 13451 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8850484546516057e-06, "loss": 0.4772, "step": 13452 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8838830910855188e-06, "loss": 0.4494, "step": 13453 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8827180503924668e-06, "loss": 0.4553, "step": 13454 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8815533326187963e-06, "loss": 0.4154, "step": 13455 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8803889378108397e-06, "loss": 0.4852, "step": 13456 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8792248660149225e-06, "loss": 0.4396, "step": 13457 }, { "epoch": 2.43, "grad_norm": 0.0, "learning_rate": 1.8780611172773556e-06, "loss": 0.4235, "step": 13458 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8768976916444281e-06, "loss": 0.3701, "step": 13459 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8757345891624257e-06, "loss": 0.3828, "step": 13460 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8745718098776167e-06, "loss": 0.3591, "step": 13461 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.873409353836263e-06, "loss": 0.3148, "step": 13462 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8722472210846064e-06, "loss": 0.4675, "step": 13463 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8710854116688804e-06, "loss": 0.4059, "step": 13464 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.869923925635304e-06, "loss": 0.3777, "step": 13465 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8687627630300764e-06, "loss": 0.4152, "step": 13466 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.867601923899398e-06, "loss": 0.4645, "step": 13467 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8664414082894467e-06, "loss": 0.4015, "step": 13468 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8652812162463907e-06, "loss": 0.4041, "step": 13469 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8641213478163821e-06, "loss": 0.4626, "step": 13470 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8629618030455643e-06, "loss": 0.4466, "step": 13471 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8618025819800633e-06, "loss": 0.4488, "step": 13472 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8606436846660013e-06, "loss": 0.4532, "step": 13473 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8594851111494762e-06, "loss": 0.473, "step": 13474 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.858326861476577e-06, "loss": 0.4292, "step": 13475 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8571689356933831e-06, "loss": 0.4813, "step": 13476 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8560113338459562e-06, "loss": 0.459, "step": 13477 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8548540559803529e-06, "loss": 0.4807, "step": 13478 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8536971021426075e-06, "loss": 0.3996, "step": 13479 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8525404723787477e-06, "loss": 0.3896, "step": 13480 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8513841667347877e-06, "loss": 0.4423, "step": 13481 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8502281852567195e-06, "loss": 0.441, "step": 13482 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8490725279905385e-06, "loss": 0.3186, "step": 13483 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.847917194982215e-06, "loss": 0.4321, "step": 13484 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8467621862777109e-06, "loss": 0.4901, "step": 13485 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8456075019229747e-06, "loss": 0.3673, "step": 13486 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8444531419639411e-06, "loss": 0.3865, "step": 13487 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8432991064465322e-06, "loss": 0.3864, "step": 13488 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.842145395416659e-06, "loss": 0.3932, "step": 13489 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.840992008920217e-06, "loss": 0.4729, "step": 13490 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8398389470030896e-06, "loss": 0.4626, "step": 13491 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.838686209711148e-06, "loss": 0.4173, "step": 13492 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8375337970902508e-06, "loss": 0.4354, "step": 13493 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.836381709186239e-06, "loss": 0.4406, "step": 13494 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8352299460449497e-06, "loss": 0.426, "step": 13495 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8340785077122037e-06, "loss": 0.4922, "step": 13496 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8329273942338011e-06, "loss": 0.4454, "step": 13497 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8317766056555376e-06, "loss": 0.4198, "step": 13498 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8306261420231908e-06, "loss": 0.4103, "step": 13499 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8294760033825344e-06, "loss": 0.4591, "step": 13500 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8283261897793192e-06, "loss": 0.3671, "step": 13501 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8271767012592867e-06, "loss": 0.3898, "step": 13502 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.826027537868168e-06, "loss": 0.4404, "step": 13503 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.824878699651672e-06, "loss": 0.449, "step": 13504 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.823730186655509e-06, "loss": 0.3785, "step": 13505 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8225819989253658e-06, "loss": 0.4605, "step": 13506 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8214341365069187e-06, "loss": 0.4671, "step": 13507 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.820286599445833e-06, "loss": 0.4404, "step": 13508 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8191393877877583e-06, "loss": 0.3986, "step": 13509 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8179925015783306e-06, "loss": 0.4659, "step": 13510 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8168459408631845e-06, "loss": 0.4715, "step": 13511 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8156997056879211e-06, "loss": 0.4006, "step": 13512 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8145537960981451e-06, "loss": 0.5341, "step": 13513 }, { "epoch": 2.44, "grad_norm": 0.0, "learning_rate": 1.8134082121394403e-06, "loss": 0.4009, "step": 13514 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.8122629538573798e-06, "loss": 0.4435, "step": 13515 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.8111180212975277e-06, "loss": 0.5009, "step": 13516 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.809973414505428e-06, "loss": 0.42, "step": 13517 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.8088291335266206e-06, "loss": 0.4041, "step": 13518 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.8076851784066185e-06, "loss": 0.4301, "step": 13519 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.806541549190932e-06, "loss": 0.4507, "step": 13520 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.8053982459250618e-06, "loss": 0.3665, "step": 13521 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.8042552686544867e-06, "loss": 0.4173, "step": 13522 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.8031126174246783e-06, "loss": 0.4884, "step": 13523 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.8019702922810911e-06, "loss": 0.4076, "step": 13524 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.8008282932691701e-06, "loss": 0.4228, "step": 13525 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7996866204343454e-06, "loss": 0.4293, "step": 13526 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7985452738220343e-06, "loss": 0.3349, "step": 13527 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7974042534776425e-06, "loss": 0.4837, "step": 13528 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7962635594465616e-06, "loss": 0.4303, "step": 13529 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7951231917741707e-06, "loss": 0.3882, "step": 13530 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7939831505058336e-06, "loss": 0.4156, "step": 13531 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.792843435686903e-06, "loss": 0.4228, "step": 13532 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7917040473627223e-06, "loss": 0.3879, "step": 13533 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7905649855786177e-06, "loss": 0.3986, "step": 13534 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7894262503798999e-06, "loss": 0.4463, "step": 13535 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7882878418118721e-06, "loss": 0.3931, "step": 13536 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7871497599198174e-06, "loss": 0.4423, "step": 13537 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7860120047490182e-06, "loss": 0.4466, "step": 13538 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7848745763447328e-06, "loss": 0.3649, "step": 13539 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7837374747522097e-06, "loss": 0.4408, "step": 13540 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.782600700016688e-06, "loss": 0.4132, "step": 13541 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7814642521833813e-06, "loss": 0.4, "step": 13542 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7803281312975095e-06, "loss": 0.3614, "step": 13543 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7791923374042652e-06, "loss": 0.4323, "step": 13544 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7780568705488333e-06, "loss": 0.422, "step": 13545 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7769217307763842e-06, "loss": 0.4743, "step": 13546 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7757869181320753e-06, "loss": 0.3999, "step": 13547 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7746524326610482e-06, "loss": 0.441, "step": 13548 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7735182744084444e-06, "loss": 0.3975, "step": 13549 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7723844434193738e-06, "loss": 0.3503, "step": 13550 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7712509397389444e-06, "loss": 0.4172, "step": 13551 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.770117763412249e-06, "loss": 0.465, "step": 13552 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7689849144843652e-06, "loss": 0.3242, "step": 13553 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7678523930003645e-06, "loss": 0.4697, "step": 13554 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7667201990052974e-06, "loss": 0.4845, "step": 13555 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.765588332544209e-06, "loss": 0.471, "step": 13556 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7644567936621194e-06, "loss": 0.3985, "step": 13557 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7633255824040453e-06, "loss": 0.4669, "step": 13558 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7621946988149919e-06, "loss": 0.4386, "step": 13559 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7610641429399456e-06, "loss": 0.4079, "step": 13560 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7599339148238815e-06, "loss": 0.396, "step": 13561 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7588040145117614e-06, "loss": 0.3783, "step": 13562 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.757674442048537e-06, "loss": 0.455, "step": 13563 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7565451974791426e-06, "loss": 0.3916, "step": 13564 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.755416280848502e-06, "loss": 0.4699, "step": 13565 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7542876922015262e-06, "loss": 0.3744, "step": 13566 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7531594315831114e-06, "loss": 0.4366, "step": 13567 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7520314990381416e-06, "loss": 0.3696, "step": 13568 }, { "epoch": 2.45, "grad_norm": 0.0, "learning_rate": 1.7509038946114853e-06, "loss": 0.4796, "step": 13569 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.749776618348007e-06, "loss": 0.4009, "step": 13570 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7486496702925504e-06, "loss": 0.4616, "step": 13571 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.747523050489943e-06, "loss": 0.4045, "step": 13572 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7463967589850062e-06, "loss": 0.4417, "step": 13573 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7452707958225423e-06, "loss": 0.4201, "step": 13574 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7441451610473504e-06, "loss": 0.4532, "step": 13575 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7430198547042066e-06, "loss": 0.4506, "step": 13576 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7418948768378785e-06, "loss": 0.39, "step": 13577 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7407702274931182e-06, "loss": 0.4294, "step": 13578 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7396459067146677e-06, "loss": 0.419, "step": 13579 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7385219145472543e-06, "loss": 0.4307, "step": 13580 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7373982510355902e-06, "loss": 0.4437, "step": 13581 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7362749162243797e-06, "loss": 0.3947, "step": 13582 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7351519101583092e-06, "loss": 0.3863, "step": 13583 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7340292328820541e-06, "loss": 0.3642, "step": 13584 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7329068844402775e-06, "loss": 0.3993, "step": 13585 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7317848648776236e-06, "loss": 0.3426, "step": 13586 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.730663174238737e-06, "loss": 0.4557, "step": 13587 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.729541812568234e-06, "loss": 0.3826, "step": 13588 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7284207799107255e-06, "loss": 0.3732, "step": 13589 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7273000763108071e-06, "loss": 0.4553, "step": 13590 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.726179701813062e-06, "loss": 0.324, "step": 13591 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7250596564620647e-06, "loss": 0.4022, "step": 13592 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7239399403023704e-06, "loss": 0.3803, "step": 13593 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7228205533785259e-06, "loss": 0.3858, "step": 13594 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7217014957350554e-06, "loss": 0.3755, "step": 13595 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7205827674164788e-06, "loss": 0.4141, "step": 13596 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7194643684673063e-06, "loss": 0.4882, "step": 13597 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7183462989320265e-06, "loss": 0.3869, "step": 13598 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7172285588551173e-06, "loss": 0.4574, "step": 13599 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7161111482810466e-06, "loss": 0.3777, "step": 13600 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7149940672542641e-06, "loss": 0.437, "step": 13601 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7138773158192113e-06, "loss": 0.4019, "step": 13602 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7127608940203143e-06, "loss": 0.4202, "step": 13603 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7116448019019849e-06, "loss": 0.512, "step": 13604 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7105290395086238e-06, "loss": 0.4041, "step": 13605 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.709413606884619e-06, "loss": 0.4805, "step": 13606 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7082985040743406e-06, "loss": 0.3776, "step": 13607 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7071837311221552e-06, "loss": 0.3756, "step": 13608 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7060692880724095e-06, "loss": 0.4527, "step": 13609 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7049551749694326e-06, "loss": 0.481, "step": 13610 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7038413918575503e-06, "loss": 0.4348, "step": 13611 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7027279387810657e-06, "loss": 0.3803, "step": 13612 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7016148157842816e-06, "loss": 0.4428, "step": 13613 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.7005020229114744e-06, "loss": 0.4097, "step": 13614 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.6993895602069156e-06, "loss": 0.4183, "step": 13615 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.6982774277148605e-06, "loss": 0.3238, "step": 13616 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.69716562547955e-06, "loss": 0.3962, "step": 13617 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.6960541535452145e-06, "loss": 0.4377, "step": 13618 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.6949430119560706e-06, "loss": 0.3737, "step": 13619 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.69383220075632e-06, "loss": 0.4106, "step": 13620 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.6927217199901546e-06, "loss": 0.3394, "step": 13621 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.691611569701751e-06, "loss": 0.4296, "step": 13622 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.6905017499352716e-06, "loss": 0.4029, "step": 13623 }, { "epoch": 2.46, "grad_norm": 0.0, "learning_rate": 1.6893922607348667e-06, "loss": 0.4566, "step": 13624 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6882831021446755e-06, "loss": 0.3626, "step": 13625 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6871742742088216e-06, "loss": 0.3571, "step": 13626 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.686065776971415e-06, "loss": 0.4317, "step": 13627 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6849576104765542e-06, "loss": 0.4983, "step": 13628 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6838497747683214e-06, "loss": 0.4619, "step": 13629 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6827422698907946e-06, "loss": 0.4582, "step": 13630 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.681635095888028e-06, "loss": 0.5415, "step": 13631 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.680528252804069e-06, "loss": 0.4165, "step": 13632 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6794217406829473e-06, "loss": 0.3402, "step": 13633 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6783155595686783e-06, "loss": 0.47, "step": 13634 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6772097095052763e-06, "loss": 0.4852, "step": 13635 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6761041905367293e-06, "loss": 0.3504, "step": 13636 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6749990027070174e-06, "loss": 0.5425, "step": 13637 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6738941460601054e-06, "loss": 0.4817, "step": 13638 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6727896206399496e-06, "loss": 0.3848, "step": 13639 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.671685426490487e-06, "loss": 0.3939, "step": 13640 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6705815636556455e-06, "loss": 0.45, "step": 13641 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.669478032179338e-06, "loss": 0.3373, "step": 13642 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.668374832105466e-06, "loss": 0.4653, "step": 13643 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6672719634779156e-06, "loss": 0.3848, "step": 13644 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6661694263405592e-06, "loss": 0.4311, "step": 13645 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.665067220737262e-06, "loss": 0.4009, "step": 13646 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.663965346711871e-06, "loss": 0.5572, "step": 13647 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.662863804308218e-06, "loss": 0.3938, "step": 13648 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6617625935701242e-06, "loss": 0.399, "step": 13649 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6606617145413962e-06, "loss": 0.4074, "step": 13650 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6595611672658341e-06, "loss": 0.4339, "step": 13651 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6584609517872163e-06, "loss": 0.4605, "step": 13652 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6573610681493114e-06, "loss": 0.3644, "step": 13653 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6562615163958751e-06, "loss": 0.4269, "step": 13654 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6551622965706493e-06, "loss": 0.482, "step": 13655 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6540634087173634e-06, "loss": 0.3872, "step": 13656 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6529648528797327e-06, "loss": 0.3138, "step": 13657 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6518666291014584e-06, "loss": 0.4157, "step": 13658 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6507687374262316e-06, "loss": 0.4798, "step": 13659 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.649671177897727e-06, "loss": 0.4586, "step": 13660 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6485739505596054e-06, "loss": 0.4503, "step": 13661 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6474770554555241e-06, "loss": 0.4388, "step": 13662 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6463804926291117e-06, "loss": 0.3908, "step": 13663 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6452842621239927e-06, "loss": 0.4498, "step": 13664 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6441883639837797e-06, "loss": 0.3959, "step": 13665 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.643092798252065e-06, "loss": 0.453, "step": 13666 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6419975649724374e-06, "loss": 0.3926, "step": 13667 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.640902664188464e-06, "loss": 0.4245, "step": 13668 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6398080959437035e-06, "loss": 0.4461, "step": 13669 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6387138602816998e-06, "loss": 0.4355, "step": 13670 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6376199572459795e-06, "loss": 0.4624, "step": 13671 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.636526386880064e-06, "loss": 0.3306, "step": 13672 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6354331492274566e-06, "loss": 0.3695, "step": 13673 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.634340244331648e-06, "loss": 0.3861, "step": 13674 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6332476722361145e-06, "loss": 0.342, "step": 13675 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6321554329843236e-06, "loss": 0.3908, "step": 13676 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6310635266197227e-06, "loss": 0.4159, "step": 13677 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6299719531857527e-06, "loss": 0.4695, "step": 13678 }, { "epoch": 2.47, "grad_norm": 0.0, "learning_rate": 1.6288807127258366e-06, "loss": 0.4709, "step": 13679 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6277898052833862e-06, "loss": 0.348, "step": 13680 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6266992309017993e-06, "loss": 0.3817, "step": 13681 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.625608989624462e-06, "loss": 0.3593, "step": 13682 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6245190814947432e-06, "loss": 0.391, "step": 13683 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6234295065560046e-06, "loss": 0.3535, "step": 13684 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6223402648515928e-06, "loss": 0.4483, "step": 13685 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6212513564248355e-06, "loss": 0.397, "step": 13686 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6201627813190523e-06, "loss": 0.4054, "step": 13687 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6190745395775464e-06, "loss": 0.3417, "step": 13688 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6179866312436144e-06, "loss": 0.3909, "step": 13689 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6168990563605346e-06, "loss": 0.4442, "step": 13690 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6158118149715706e-06, "loss": 0.4323, "step": 13691 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6147249071199767e-06, "loss": 0.5006, "step": 13692 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.613638332848989e-06, "loss": 0.4893, "step": 13693 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6125520922018368e-06, "loss": 0.4167, "step": 13694 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6114661852217295e-06, "loss": 0.3974, "step": 13695 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6103806119518682e-06, "loss": 0.4118, "step": 13696 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6092953724354388e-06, "loss": 0.4445, "step": 13697 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6082104667156128e-06, "loss": 0.3959, "step": 13698 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6071258948355484e-06, "loss": 0.4151, "step": 13699 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6060416568383986e-06, "loss": 0.4438, "step": 13700 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6049577527672888e-06, "loss": 0.4214, "step": 13701 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6038741826653414e-06, "loss": 0.3734, "step": 13702 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.602790946575662e-06, "loss": 0.4974, "step": 13703 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6017080445413413e-06, "loss": 0.4793, "step": 13704 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.6006254766054651e-06, "loss": 0.3742, "step": 13705 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5995432428110957e-06, "loss": 0.46, "step": 13706 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5984613432012862e-06, "loss": 0.4287, "step": 13707 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5973797778190802e-06, "loss": 0.3032, "step": 13708 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5962985467074964e-06, "loss": 0.5135, "step": 13709 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5952176499095552e-06, "loss": 0.4211, "step": 13710 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5941370874682539e-06, "loss": 0.4779, "step": 13711 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5930568594265795e-06, "loss": 0.4609, "step": 13712 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5919769658275052e-06, "loss": 0.4162, "step": 13713 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5908974067139903e-06, "loss": 0.3366, "step": 13714 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5898181821289827e-06, "loss": 0.424, "step": 13715 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5887392921154143e-06, "loss": 0.4736, "step": 13716 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5876607367162078e-06, "loss": 0.4577, "step": 13717 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5865825159742676e-06, "loss": 0.4989, "step": 13718 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5855046299324872e-06, "loss": 0.4382, "step": 13719 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.584427078633748e-06, "loss": 0.4285, "step": 13720 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5833498621209141e-06, "loss": 0.4125, "step": 13721 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5822729804368432e-06, "loss": 0.3655, "step": 13722 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5811964336243769e-06, "loss": 0.4661, "step": 13723 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5801202217263345e-06, "loss": 0.4328, "step": 13724 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5790443447855354e-06, "loss": 0.4517, "step": 13725 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5779688028447748e-06, "loss": 0.371, "step": 13726 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5768935959468457e-06, "loss": 0.3705, "step": 13727 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5758187241345192e-06, "loss": 0.3449, "step": 13728 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5747441874505542e-06, "loss": 0.4518, "step": 13729 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5736699859377025e-06, "loss": 0.5001, "step": 13730 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5725961196386885e-06, "loss": 0.4719, "step": 13731 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5715225885962393e-06, "loss": 0.421, "step": 13732 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5704493928530618e-06, "loss": 0.4141, "step": 13733 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5693765324518474e-06, "loss": 0.404, "step": 13734 }, { "epoch": 2.48, "grad_norm": 0.0, "learning_rate": 1.5683040074352762e-06, "loss": 0.3505, "step": 13735 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5672318178460166e-06, "loss": 0.4394, "step": 13736 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.56615996372672e-06, "loss": 0.4449, "step": 13737 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5650884451200322e-06, "loss": 0.4678, "step": 13738 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5640172620685723e-06, "loss": 0.4542, "step": 13739 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5629464146149587e-06, "loss": 0.3386, "step": 13740 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5618759028017894e-06, "loss": 0.4277, "step": 13741 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5608057266716503e-06, "loss": 0.406, "step": 13742 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5597358862671186e-06, "loss": 0.3548, "step": 13743 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5586663816307523e-06, "loss": 0.4239, "step": 13744 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5575972128050976e-06, "loss": 0.3777, "step": 13745 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5565283798326914e-06, "loss": 0.4209, "step": 13746 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5554598827560463e-06, "loss": 0.501, "step": 13747 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.554391721617675e-06, "loss": 0.4656, "step": 13748 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5533238964600694e-06, "loss": 0.4075, "step": 13749 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5522564073257097e-06, "loss": 0.4283, "step": 13750 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5511892542570606e-06, "loss": 0.4769, "step": 13751 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5501224372965773e-06, "loss": 0.4416, "step": 13752 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5490559564866992e-06, "loss": 0.4351, "step": 13753 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5479898118698523e-06, "loss": 0.4754, "step": 13754 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5469240034884493e-06, "loss": 0.3672, "step": 13755 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5458585313848906e-06, "loss": 0.3922, "step": 13756 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5447933956015627e-06, "loss": 0.3779, "step": 13757 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5437285961808357e-06, "loss": 0.45, "step": 13758 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5426641331650739e-06, "loss": 0.4396, "step": 13759 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5416000065966208e-06, "loss": 0.4519, "step": 13760 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5405362165178118e-06, "loss": 0.4067, "step": 13761 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5394727629709616e-06, "loss": 0.4496, "step": 13762 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5384096459983767e-06, "loss": 0.404, "step": 13763 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5373468656423529e-06, "loss": 0.4045, "step": 13764 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5362844219451688e-06, "loss": 0.4346, "step": 13765 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5352223149490896e-06, "loss": 0.3895, "step": 13766 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5341605446963681e-06, "loss": 0.4782, "step": 13767 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5330991112292415e-06, "loss": 0.4239, "step": 13768 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5320380145899383e-06, "loss": 0.4116, "step": 13769 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5309772548206692e-06, "loss": 0.3937, "step": 13770 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5299168319636327e-06, "loss": 0.3886, "step": 13771 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5288567460610138e-06, "loss": 0.4545, "step": 13772 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5277969971549867e-06, "loss": 0.4053, "step": 13773 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5267375852877087e-06, "loss": 0.3428, "step": 13774 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5256785105013216e-06, "loss": 0.3782, "step": 13775 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5246197728379664e-06, "loss": 0.4341, "step": 13776 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5235613723397536e-06, "loss": 0.4672, "step": 13777 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5225033090487896e-06, "loss": 0.448, "step": 13778 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5214455830071672e-06, "loss": 0.3446, "step": 13779 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.520388194256962e-06, "loss": 0.458, "step": 13780 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5193311428402424e-06, "loss": 0.3987, "step": 13781 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.518274428799058e-06, "loss": 0.4139, "step": 13782 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5172180521754499e-06, "loss": 0.4256, "step": 13783 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5161620130114374e-06, "loss": 0.4257, "step": 13784 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5151063113490305e-06, "loss": 0.4797, "step": 13785 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5140509472302323e-06, "loss": 0.3763, "step": 13786 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5129959206970247e-06, "loss": 0.3691, "step": 13787 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5119412317913795e-06, "loss": 0.3791, "step": 13788 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5108868805552513e-06, "loss": 0.408, "step": 13789 }, { "epoch": 2.49, "grad_norm": 0.0, "learning_rate": 1.5098328670305872e-06, "loss": 0.4289, "step": 13790 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.5087791912593152e-06, "loss": 0.5086, "step": 13791 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.5077258532833527e-06, "loss": 0.4228, "step": 13792 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.506672853144605e-06, "loss": 0.3971, "step": 13793 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.505620190884961e-06, "loss": 0.4021, "step": 13794 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.5045678665462971e-06, "loss": 0.4077, "step": 13795 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.5035158801704742e-06, "loss": 0.3982, "step": 13796 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.5024642317993477e-06, "loss": 0.4201, "step": 13797 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.5014129214747509e-06, "loss": 0.4166, "step": 13798 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.500361949238509e-06, "loss": 0.4032, "step": 13799 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.499311315132428e-06, "loss": 0.4102, "step": 13800 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4982610191983028e-06, "loss": 0.4273, "step": 13801 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4972110614779212e-06, "loss": 0.4265, "step": 13802 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4961614420130488e-06, "loss": 0.4271, "step": 13803 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4951121608454422e-06, "loss": 0.4326, "step": 13804 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4940632180168435e-06, "loss": 0.4064, "step": 13805 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4930146135689826e-06, "loss": 0.4041, "step": 13806 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.491966347543572e-06, "loss": 0.5015, "step": 13807 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4909184199823168e-06, "loss": 0.4581, "step": 13808 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4898708309269028e-06, "loss": 0.3146, "step": 13809 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4888235804190054e-06, "loss": 0.3829, "step": 13810 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.487776668500287e-06, "loss": 0.4193, "step": 13811 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4867300952123953e-06, "loss": 0.4149, "step": 13812 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4856838605969614e-06, "loss": 0.4333, "step": 13813 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4846379646956144e-06, "loss": 0.3747, "step": 13814 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.483592407549954e-06, "loss": 0.4026, "step": 13815 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4825471892015775e-06, "loss": 0.4227, "step": 13816 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4815023096920654e-06, "loss": 0.3719, "step": 13817 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4804577690629818e-06, "loss": 0.392, "step": 13818 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.479413567355885e-06, "loss": 0.4151, "step": 13819 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4783697046123124e-06, "loss": 0.4598, "step": 13820 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4773261808737949e-06, "loss": 0.433, "step": 13821 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4762829961818392e-06, "loss": 0.3801, "step": 13822 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4752401505779456e-06, "loss": 0.3317, "step": 13823 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4741976441036054e-06, "loss": 0.4471, "step": 13824 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4731554768002875e-06, "loss": 0.5051, "step": 13825 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4721136487094524e-06, "loss": 0.4231, "step": 13826 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4710721598725453e-06, "loss": 0.4044, "step": 13827 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.470031010330999e-06, "loss": 0.4928, "step": 13828 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4689902001262313e-06, "loss": 0.3902, "step": 13829 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.467949729299648e-06, "loss": 0.4136, "step": 13830 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4669095978926407e-06, "loss": 0.4718, "step": 13831 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.465869805946587e-06, "loss": 0.3904, "step": 13832 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.464830353502853e-06, "loss": 0.4204, "step": 13833 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4637912406027866e-06, "loss": 0.4589, "step": 13834 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4627524672877302e-06, "loss": 0.387, "step": 13835 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4617140335990087e-06, "loss": 0.4471, "step": 13836 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4606759395779269e-06, "loss": 0.441, "step": 13837 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4596381852657849e-06, "loss": 0.4666, "step": 13838 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.458600770703864e-06, "loss": 0.3571, "step": 13839 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4575636959334383e-06, "loss": 0.3833, "step": 13840 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4565269609957644e-06, "loss": 0.4588, "step": 13841 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4554905659320827e-06, "loss": 0.4531, "step": 13842 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4544545107836238e-06, "loss": 0.4315, "step": 13843 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4534187955916047e-06, "loss": 0.3806, "step": 13844 }, { "epoch": 2.5, "grad_norm": 0.0, "learning_rate": 1.4523834203972253e-06, "loss": 0.4582, "step": 13845 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4513483852416776e-06, "loss": 0.4872, "step": 13846 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4503136901661364e-06, "loss": 0.4182, "step": 13847 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4492793352117618e-06, "loss": 0.4526, "step": 13848 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4482453204197034e-06, "loss": 0.424, "step": 13849 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4472116458310948e-06, "loss": 0.3731, "step": 13850 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4461783114870598e-06, "loss": 0.4068, "step": 13851 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4451453174287089e-06, "loss": 0.4352, "step": 13852 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4441126636971292e-06, "loss": 0.3946, "step": 13853 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4430803503334045e-06, "loss": 0.4591, "step": 13854 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.442048377378601e-06, "loss": 0.4547, "step": 13855 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4410167448737744e-06, "loss": 0.4462, "step": 13856 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4399854528599656e-06, "loss": 0.3871, "step": 13857 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.438954501378199e-06, "loss": 0.4509, "step": 13858 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4379238904694903e-06, "loss": 0.4301, "step": 13859 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.436893620174833e-06, "loss": 0.4231, "step": 13860 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4358636905352186e-06, "loss": 0.4859, "step": 13861 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.434834101591618e-06, "loss": 0.4673, "step": 13862 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.433804853384989e-06, "loss": 0.3812, "step": 13863 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4327759459562785e-06, "loss": 0.3996, "step": 13864 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4317473793464176e-06, "loss": 0.3512, "step": 13865 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4307191535963227e-06, "loss": 0.4118, "step": 13866 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.429691268746901e-06, "loss": 0.4077, "step": 13867 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4286637248390423e-06, "loss": 0.4015, "step": 13868 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4276365219136245e-06, "loss": 0.467, "step": 13869 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4266096600115109e-06, "loss": 0.3939, "step": 13870 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4255831391735519e-06, "loss": 0.4387, "step": 13871 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4245569594405818e-06, "loss": 0.3972, "step": 13872 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4235311208534285e-06, "loss": 0.3786, "step": 13873 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4225056234529033e-06, "loss": 0.427, "step": 13874 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4214804672797943e-06, "loss": 0.3773, "step": 13875 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4204556523748892e-06, "loss": 0.4783, "step": 13876 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4194311787789539e-06, "loss": 0.4121, "step": 13877 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.418407046532747e-06, "loss": 0.4314, "step": 13878 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4173832556770096e-06, "loss": 0.4817, "step": 13879 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4163598062524685e-06, "loss": 0.4239, "step": 13880 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4153366982998395e-06, "loss": 0.4223, "step": 13881 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.414313931859822e-06, "loss": 0.3862, "step": 13882 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4132915069731057e-06, "loss": 0.4107, "step": 13883 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4122694236803624e-06, "loss": 0.4625, "step": 13884 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4112476820222531e-06, "loss": 0.4313, "step": 13885 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4102262820394242e-06, "loss": 0.3772, "step": 13886 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4092052237725084e-06, "loss": 0.4844, "step": 13887 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4081845072621247e-06, "loss": 0.431, "step": 13888 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4071641325488839e-06, "loss": 0.4349, "step": 13889 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.406144099673371e-06, "loss": 0.3678, "step": 13890 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4051244086761696e-06, "loss": 0.458, "step": 13891 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4041050595978423e-06, "loss": 0.4193, "step": 13892 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.403086052478938e-06, "loss": 0.4124, "step": 13893 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4020673873600011e-06, "loss": 0.3661, "step": 13894 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4010490642815532e-06, "loss": 0.356, "step": 13895 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.4000310832841035e-06, "loss": 0.3579, "step": 13896 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.3990134444081527e-06, "loss": 0.3471, "step": 13897 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.3979961476941762e-06, "loss": 0.3866, "step": 13898 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.3969791931826516e-06, "loss": 0.4342, "step": 13899 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.395962580914032e-06, "loss": 0.4044, "step": 13900 }, { "epoch": 2.51, "grad_norm": 0.0, "learning_rate": 1.3949463109287609e-06, "loss": 0.4057, "step": 13901 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3939303832672667e-06, "loss": 0.3596, "step": 13902 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3929147979699654e-06, "loss": 0.4564, "step": 13903 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3918995550772574e-06, "loss": 0.3655, "step": 13904 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.390884654629533e-06, "loss": 0.3741, "step": 13905 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3898700966671642e-06, "loss": 0.5205, "step": 13906 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3888558812305131e-06, "loss": 0.3943, "step": 13907 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3878420083599265e-06, "loss": 0.4082, "step": 13908 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3868284780957376e-06, "loss": 0.5016, "step": 13909 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3858152904782663e-06, "loss": 0.4296, "step": 13910 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3848024455478204e-06, "loss": 0.3911, "step": 13911 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3837899433446954e-06, "loss": 0.3936, "step": 13912 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3827777839091638e-06, "loss": 0.4536, "step": 13913 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3817659672814942e-06, "loss": 0.4075, "step": 13914 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3807544935019369e-06, "loss": 0.4221, "step": 13915 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3797433626107326e-06, "loss": 0.4716, "step": 13916 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.378732574648105e-06, "loss": 0.4998, "step": 13917 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3777221296542642e-06, "loss": 0.4298, "step": 13918 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.376712027669409e-06, "loss": 0.4727, "step": 13919 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3757022687337207e-06, "loss": 0.4415, "step": 13920 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3746928528873715e-06, "loss": 0.412, "step": 13921 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.373683780170516e-06, "loss": 0.3481, "step": 13922 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3726750506232967e-06, "loss": 0.501, "step": 13923 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3716666642858445e-06, "loss": 0.4411, "step": 13924 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.370658621198273e-06, "loss": 0.4625, "step": 13925 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3696509214006815e-06, "loss": 0.4355, "step": 13926 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3686435649331664e-06, "loss": 0.4365, "step": 13927 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.367636551835795e-06, "loss": 0.3829, "step": 13928 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.366629882148628e-06, "loss": 0.4404, "step": 13929 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3656235559117148e-06, "loss": 0.4333, "step": 13930 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3646175731650856e-06, "loss": 0.4387, "step": 13931 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3636119339487651e-06, "loss": 0.4324, "step": 13932 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.362606638302757e-06, "loss": 0.3987, "step": 13933 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3616016862670534e-06, "loss": 0.4688, "step": 13934 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3605970778816358e-06, "loss": 0.4717, "step": 13935 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3595928131864611e-06, "loss": 0.3974, "step": 13936 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3585888922214885e-06, "loss": 0.4407, "step": 13937 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3575853150266537e-06, "loss": 0.4139, "step": 13938 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3565820816418807e-06, "loss": 0.45, "step": 13939 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3555791921070782e-06, "loss": 0.4476, "step": 13940 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3545766464621457e-06, "loss": 0.4955, "step": 13941 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3535744447469635e-06, "loss": 0.4309, "step": 13942 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3525725870014029e-06, "loss": 0.4013, "step": 13943 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3515710732653175e-06, "loss": 0.4441, "step": 13944 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3505699035785513e-06, "loss": 0.4677, "step": 13945 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3495690779809312e-06, "loss": 0.4363, "step": 13946 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.348568596512273e-06, "loss": 0.4833, "step": 13947 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3475684592123739e-06, "loss": 0.42, "step": 13948 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3465686661210264e-06, "loss": 0.3943, "step": 13949 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3455692172780033e-06, "loss": 0.4093, "step": 13950 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3445701127230604e-06, "loss": 0.4833, "step": 13951 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.343571352495946e-06, "loss": 0.4442, "step": 13952 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3425729366363904e-06, "loss": 0.4651, "step": 13953 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3415748651841176e-06, "loss": 0.4173, "step": 13954 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.340577138178828e-06, "loss": 0.3027, "step": 13955 }, { "epoch": 2.52, "grad_norm": 0.0, "learning_rate": 1.3395797556602152e-06, "loss": 0.3355, "step": 13956 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3385827176679544e-06, "loss": 0.4349, "step": 13957 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3375860242417127e-06, "loss": 0.3763, "step": 13958 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3365896754211371e-06, "loss": 0.3984, "step": 13959 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3355936712458661e-06, "loss": 0.4372, "step": 13960 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3345980117555212e-06, "loss": 0.447, "step": 13961 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3336026969897132e-06, "loss": 0.3296, "step": 13962 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3326077269880356e-06, "loss": 0.5471, "step": 13963 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.331613101790068e-06, "loss": 0.4824, "step": 13964 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3306188214353866e-06, "loss": 0.4552, "step": 13965 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3296248859635374e-06, "loss": 0.3557, "step": 13966 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3286312954140623e-06, "loss": 0.4351, "step": 13967 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3276380498264908e-06, "loss": 0.4366, "step": 13968 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.326645149240331e-06, "loss": 0.4449, "step": 13969 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.325652593695087e-06, "loss": 0.3609, "step": 13970 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.324660383230244e-06, "loss": 0.4826, "step": 13971 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3236685178852726e-06, "loss": 0.4292, "step": 13972 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3226769976996322e-06, "loss": 0.517, "step": 13973 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3216858227127628e-06, "loss": 0.3382, "step": 13974 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3206949929640988e-06, "loss": 0.3972, "step": 13975 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3197045084930582e-06, "loss": 0.4161, "step": 13976 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3187143693390413e-06, "loss": 0.4059, "step": 13977 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3177245755414391e-06, "loss": 0.3881, "step": 13978 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3167351271396267e-06, "loss": 0.4234, "step": 13979 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3157460241729659e-06, "loss": 0.4152, "step": 13980 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3147572666808052e-06, "loss": 0.3792, "step": 13981 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3137688547024797e-06, "loss": 0.378, "step": 13982 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3127807882773092e-06, "loss": 0.4048, "step": 13983 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3117930674446e-06, "loss": 0.4684, "step": 13984 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3108056922436451e-06, "loss": 0.3949, "step": 13985 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3098186627137276e-06, "loss": 0.4177, "step": 13986 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3088319788941106e-06, "loss": 0.4296, "step": 13987 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.307845640824048e-06, "loss": 0.4897, "step": 13988 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3068596485427744e-06, "loss": 0.3563, "step": 13989 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3058740020895144e-06, "loss": 0.359, "step": 13990 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3048887015034827e-06, "loss": 0.4491, "step": 13991 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3039037468238735e-06, "loss": 0.4139, "step": 13992 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3029191380898698e-06, "loss": 0.4705, "step": 13993 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3019348753406425e-06, "loss": 0.4454, "step": 13994 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.3009509586153469e-06, "loss": 0.4238, "step": 13995 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2999673879531238e-06, "loss": 0.4247, "step": 13996 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.298984163393102e-06, "loss": 0.3715, "step": 13997 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.298001284974396e-06, "loss": 0.4351, "step": 13998 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2970187527361067e-06, "loss": 0.4545, "step": 13999 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2960365667173203e-06, "loss": 0.3748, "step": 14000 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2950547269571089e-06, "loss": 0.417, "step": 14001 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2940732334945315e-06, "loss": 0.4128, "step": 14002 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2930920863686402e-06, "loss": 0.3948, "step": 14003 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2921112856184582e-06, "loss": 0.3927, "step": 14004 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2911308312830061e-06, "loss": 0.447, "step": 14005 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.29015072340129e-06, "loss": 0.3987, "step": 14006 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2891709620122962e-06, "loss": 0.416, "step": 14007 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2881915471550065e-06, "loss": 0.442, "step": 14008 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2872124788683804e-06, "loss": 0.3474, "step": 14009 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2862337571913685e-06, "loss": 0.3929, "step": 14010 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 1.2852553821629066e-06, "loss": 0.3661, "step": 14011 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.284277353821911e-06, "loss": 0.3896, "step": 14012 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2832996722072954e-06, "loss": 0.4074, "step": 14013 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.282322337357952e-06, "loss": 0.3985, "step": 14014 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2813453493127593e-06, "loss": 0.4447, "step": 14015 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2803687081105842e-06, "loss": 0.4487, "step": 14016 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2793924137902801e-06, "loss": 0.3911, "step": 14017 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.278416466390685e-06, "loss": 0.4526, "step": 14018 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2774408659506233e-06, "loss": 0.3912, "step": 14019 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.276465612508907e-06, "loss": 0.409, "step": 14020 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2754907061043331e-06, "loss": 0.4338, "step": 14021 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.274516146775684e-06, "loss": 0.4068, "step": 14022 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2735419345617284e-06, "loss": 0.3931, "step": 14023 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2725680695012267e-06, "loss": 0.4098, "step": 14024 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2715945516329164e-06, "loss": 0.3612, "step": 14025 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2706213809955314e-06, "loss": 0.3751, "step": 14026 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2696485576277795e-06, "loss": 0.4208, "step": 14027 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2686760815683608e-06, "loss": 0.3993, "step": 14028 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2677039528559675e-06, "loss": 0.3488, "step": 14029 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2667321715292713e-06, "loss": 0.3523, "step": 14030 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.26576073762693e-06, "loss": 0.3192, "step": 14031 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.264789651187588e-06, "loss": 0.4047, "step": 14032 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.263818912249879e-06, "loss": 0.4385, "step": 14033 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2628485208524188e-06, "loss": 0.3869, "step": 14034 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2618784770338132e-06, "loss": 0.356, "step": 14035 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2609087808326503e-06, "loss": 0.3741, "step": 14036 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.259939432287507e-06, "loss": 0.414, "step": 14037 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2589704314369455e-06, "loss": 0.5094, "step": 14038 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2580017783195141e-06, "loss": 0.4148, "step": 14039 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2570334729737466e-06, "loss": 0.4258, "step": 14040 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2560655154381685e-06, "loss": 0.4158, "step": 14041 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2550979057512824e-06, "loss": 0.4215, "step": 14042 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.254130643951581e-06, "loss": 0.4381, "step": 14043 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2531637300775468e-06, "loss": 0.3925, "step": 14044 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.25219716416764e-06, "loss": 0.4329, "step": 14045 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2512309462603178e-06, "loss": 0.4513, "step": 14046 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2502650763940171e-06, "loss": 0.4466, "step": 14047 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2492995546071629e-06, "loss": 0.406, "step": 14048 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2483343809381599e-06, "loss": 0.4176, "step": 14049 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.247369555425406e-06, "loss": 0.3799, "step": 14050 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2464050781072878e-06, "loss": 0.4257, "step": 14051 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.245440949022172e-06, "loss": 0.3924, "step": 14052 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.244477168208411e-06, "loss": 0.5103, "step": 14053 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2435137357043492e-06, "loss": 0.4232, "step": 14054 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2425506515483098e-06, "loss": 0.4804, "step": 14055 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2415879157786092e-06, "loss": 0.4638, "step": 14056 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2406255284335444e-06, "loss": 0.4762, "step": 14057 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2396634895514025e-06, "loss": 0.3891, "step": 14058 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.238701799170453e-06, "loss": 0.3791, "step": 14059 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.2377404573289564e-06, "loss": 0.428, "step": 14060 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.236779464065152e-06, "loss": 0.4118, "step": 14061 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.235818819417275e-06, "loss": 0.3832, "step": 14062 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.23485852342354e-06, "loss": 0.4015, "step": 14063 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.233898576122149e-06, "loss": 0.4441, "step": 14064 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.232938977551289e-06, "loss": 0.4396, "step": 14065 }, { "epoch": 2.54, "grad_norm": 0.0, "learning_rate": 1.231979727749133e-06, "loss": 0.4313, "step": 14066 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2310208267538448e-06, "loss": 0.4765, "step": 14067 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2300622746035718e-06, "loss": 0.4046, "step": 14068 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2291040713364443e-06, "loss": 0.379, "step": 14069 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2281462169905822e-06, "loss": 0.3892, "step": 14070 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2271887116040915e-06, "loss": 0.4372, "step": 14071 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2262315552150617e-06, "loss": 0.3677, "step": 14072 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2252747478615711e-06, "loss": 0.4708, "step": 14073 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.224318289581683e-06, "loss": 0.4085, "step": 14074 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2233621804134466e-06, "loss": 0.3439, "step": 14075 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2224064203948981e-06, "loss": 0.4102, "step": 14076 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2214510095640576e-06, "loss": 0.3795, "step": 14077 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2204959479589362e-06, "loss": 0.4286, "step": 14078 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2195412356175274e-06, "loss": 0.4356, "step": 14079 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2185868725778083e-06, "loss": 0.475, "step": 14080 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2176328588777476e-06, "loss": 0.436, "step": 14081 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2166791945552937e-06, "loss": 0.3145, "step": 14082 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2157258796483906e-06, "loss": 0.4364, "step": 14083 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2147729141949605e-06, "loss": 0.3731, "step": 14084 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.213820298232914e-06, "loss": 0.4446, "step": 14085 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2128680318001495e-06, "loss": 0.3981, "step": 14086 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2119161149345427e-06, "loss": 0.3415, "step": 14087 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2109645476739707e-06, "loss": 0.4826, "step": 14088 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2100133300562855e-06, "loss": 0.4253, "step": 14089 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.209062462119328e-06, "loss": 0.4891, "step": 14090 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.208111943900926e-06, "loss": 0.5085, "step": 14091 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.207161775438891e-06, "loss": 0.4439, "step": 14092 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.206211956771024e-06, "loss": 0.4199, "step": 14093 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2052624879351105e-06, "loss": 0.4356, "step": 14094 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2043133689689213e-06, "loss": 0.4755, "step": 14095 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2033645999102127e-06, "loss": 0.3828, "step": 14096 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2024161807967316e-06, "loss": 0.4452, "step": 14097 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.2014681116662053e-06, "loss": 0.457, "step": 14098 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.200520392556348e-06, "loss": 0.5079, "step": 14099 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1995730235048664e-06, "loss": 0.3914, "step": 14100 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.198626004549448e-06, "loss": 0.44, "step": 14101 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1976793357277617e-06, "loss": 0.3874, "step": 14102 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1967330170774717e-06, "loss": 0.3845, "step": 14103 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1957870486362199e-06, "loss": 0.4464, "step": 14104 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1948414304416444e-06, "loss": 0.4216, "step": 14105 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1938961625313617e-06, "loss": 0.3771, "step": 14106 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1929512449429736e-06, "loss": 0.4646, "step": 14107 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1920066777140737e-06, "loss": 0.4308, "step": 14108 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1910624608822364e-06, "loss": 0.3306, "step": 14109 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1901185944850258e-06, "loss": 0.4004, "step": 14110 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1891750785599887e-06, "loss": 0.4089, "step": 14111 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1882319131446617e-06, "loss": 0.3299, "step": 14112 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.187289098276564e-06, "loss": 0.3639, "step": 14113 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1863466339932027e-06, "loss": 0.3867, "step": 14114 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1854045203320697e-06, "loss": 0.4458, "step": 14115 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1844627573306467e-06, "loss": 0.5058, "step": 14116 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1835213450263982e-06, "loss": 0.4384, "step": 14117 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1825802834567734e-06, "loss": 0.3583, "step": 14118 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.18163957265921e-06, "loss": 0.4374, "step": 14119 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.180699212671128e-06, "loss": 0.4104, "step": 14120 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1797592035299422e-06, "loss": 0.3878, "step": 14121 }, { "epoch": 2.55, "grad_norm": 0.0, "learning_rate": 1.1788195452730456e-06, "loss": 0.4557, "step": 14122 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1778802379378185e-06, "loss": 0.421, "step": 14123 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.176941281561631e-06, "loss": 0.4309, "step": 14124 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.17600267618183e-06, "loss": 0.4496, "step": 14125 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1750644218357598e-06, "loss": 0.3969, "step": 14126 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1741265185607464e-06, "loss": 0.4392, "step": 14127 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1731889663940986e-06, "loss": 0.355, "step": 14128 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1722517653731157e-06, "loss": 0.4947, "step": 14129 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.17131491553508e-06, "loss": 0.5066, "step": 14130 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1703784169172616e-06, "loss": 0.3568, "step": 14131 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1694422695569152e-06, "loss": 0.4231, "step": 14132 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1685064734912831e-06, "loss": 0.4389, "step": 14133 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1675710287575926e-06, "loss": 0.405, "step": 14134 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1666359353930578e-06, "loss": 0.4673, "step": 14135 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1657011934348772e-06, "loss": 0.4616, "step": 14136 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1647668029202353e-06, "loss": 0.4947, "step": 14137 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1638327638863079e-06, "loss": 0.4056, "step": 14138 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1628990763702518e-06, "loss": 0.3653, "step": 14139 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1619657404092078e-06, "loss": 0.4191, "step": 14140 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1610327560403067e-06, "loss": 0.492, "step": 14141 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1601001233006637e-06, "loss": 0.2958, "step": 14142 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1591678422273823e-06, "loss": 0.3966, "step": 14143 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1582359128575493e-06, "loss": 0.4673, "step": 14144 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1573043352282386e-06, "loss": 0.4102, "step": 14145 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1563731093765096e-06, "loss": 0.3663, "step": 14146 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.155442235339409e-06, "loss": 0.3654, "step": 14147 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1545117131539675e-06, "loss": 0.3899, "step": 14148 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1535815428572029e-06, "loss": 0.4596, "step": 14149 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.152651724486119e-06, "loss": 0.4755, "step": 14150 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1517222580777066e-06, "loss": 0.4807, "step": 14151 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.15079314366894e-06, "loss": 0.391, "step": 14152 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1498643812967803e-06, "loss": 0.3454, "step": 14153 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1489359709981806e-06, "loss": 0.3847, "step": 14154 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1480079128100686e-06, "loss": 0.4322, "step": 14155 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1470802067693653e-06, "loss": 0.3971, "step": 14156 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1461528529129785e-06, "loss": 0.4583, "step": 14157 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1452258512777958e-06, "loss": 0.3377, "step": 14158 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1442992019007005e-06, "loss": 0.4116, "step": 14159 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1433729048185537e-06, "loss": 0.4139, "step": 14160 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1424469600682043e-06, "loss": 0.4678, "step": 14161 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.141521367686491e-06, "loss": 0.3799, "step": 14162 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1405961277102295e-06, "loss": 0.3046, "step": 14163 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1396712401762321e-06, "loss": 0.5065, "step": 14164 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1387467051212919e-06, "loss": 0.3834, "step": 14165 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.137822522582188e-06, "loss": 0.4608, "step": 14166 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.136898692595686e-06, "loss": 0.4536, "step": 14167 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.135975215198537e-06, "loss": 0.4685, "step": 14168 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1350520904274764e-06, "loss": 0.4468, "step": 14169 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1341293183192348e-06, "loss": 0.4805, "step": 14170 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.133206898910515e-06, "loss": 0.4178, "step": 14171 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1322848322380141e-06, "loss": 0.4269, "step": 14172 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1313631183384144e-06, "loss": 0.3911, "step": 14173 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1304417572483805e-06, "loss": 0.4046, "step": 14174 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.12952074900457e-06, "loss": 0.3424, "step": 14175 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1286000936436192e-06, "loss": 0.4456, "step": 14176 }, { "epoch": 2.56, "grad_norm": 0.0, "learning_rate": 1.1276797912021576e-06, "loss": 0.3491, "step": 14177 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1267598417167903e-06, "loss": 0.4595, "step": 14178 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1258402452241146e-06, "loss": 0.4079, "step": 14179 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1249210017607193e-06, "loss": 0.4417, "step": 14180 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1240021113631695e-06, "loss": 0.3677, "step": 14181 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1230835740680212e-06, "loss": 0.3801, "step": 14182 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1221653899118157e-06, "loss": 0.3249, "step": 14183 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.12124755893108e-06, "loss": 0.4574, "step": 14184 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1203300811623263e-06, "loss": 0.4904, "step": 14185 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1194129566420531e-06, "loss": 0.476, "step": 14186 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1184961854067467e-06, "loss": 0.3953, "step": 14187 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1175797674928756e-06, "loss": 0.3972, "step": 14188 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1166637029368988e-06, "loss": 0.4678, "step": 14189 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1157479917752578e-06, "loss": 0.411, "step": 14190 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1148326340443793e-06, "loss": 0.3573, "step": 14191 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.113917629780683e-06, "loss": 0.3142, "step": 14192 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1130029790205654e-06, "loss": 0.4642, "step": 14193 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1120886818004117e-06, "loss": 0.4107, "step": 14194 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1111747381565974e-06, "loss": 0.4545, "step": 14195 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1102611481254776e-06, "loss": 0.4492, "step": 14196 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1093479117434003e-06, "loss": 0.3696, "step": 14197 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1084350290466929e-06, "loss": 0.3973, "step": 14198 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1075225000716716e-06, "loss": 0.3931, "step": 14199 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1066103248546424e-06, "loss": 0.4601, "step": 14200 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1056985034318846e-06, "loss": 0.4461, "step": 14201 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1047870358396795e-06, "loss": 0.4991, "step": 14202 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1038759221142847e-06, "loss": 0.418, "step": 14203 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1029651622919457e-06, "loss": 0.4008, "step": 14204 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1020547564088935e-06, "loss": 0.3581, "step": 14205 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1011447045013468e-06, "loss": 0.3788, "step": 14206 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.1002350066055079e-06, "loss": 0.4135, "step": 14207 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0993256627575665e-06, "loss": 0.3403, "step": 14208 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0984166729936974e-06, "loss": 0.4339, "step": 14209 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0975080373500636e-06, "loss": 0.5134, "step": 14210 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0965997558628094e-06, "loss": 0.4591, "step": 14211 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0956918285680685e-06, "loss": 0.4522, "step": 14212 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0947842555019617e-06, "loss": 0.4399, "step": 14213 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0938770367005936e-06, "loss": 0.4361, "step": 14214 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0929701722000552e-06, "loss": 0.4067, "step": 14215 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0920636620364211e-06, "loss": 0.4167, "step": 14216 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0911575062457514e-06, "loss": 0.4734, "step": 14217 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0902517048641003e-06, "loss": 0.3746, "step": 14218 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0893462579274995e-06, "loss": 0.5064, "step": 14219 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0884411654719695e-06, "loss": 0.4748, "step": 14220 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0875364275335165e-06, "loss": 0.3628, "step": 14221 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0866320441481315e-06, "loss": 0.3503, "step": 14222 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0857280153517935e-06, "loss": 0.4799, "step": 14223 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.084824341180467e-06, "loss": 0.4318, "step": 14224 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0839210216701e-06, "loss": 0.3891, "step": 14225 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0830180568566285e-06, "loss": 0.3902, "step": 14226 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0821154467759752e-06, "loss": 0.3423, "step": 14227 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0812131914640455e-06, "loss": 0.4137, "step": 14228 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0803112909567325e-06, "loss": 0.4044, "step": 14229 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.079409745289921e-06, "loss": 0.4571, "step": 14230 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.078508554499469e-06, "loss": 0.3851, "step": 14231 }, { "epoch": 2.57, "grad_norm": 0.0, "learning_rate": 1.0776077186212308e-06, "loss": 0.4545, "step": 14232 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0767072376910416e-06, "loss": 0.4078, "step": 14233 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0758071117447232e-06, "loss": 0.339, "step": 14234 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0749073408180877e-06, "loss": 0.4911, "step": 14235 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0740079249469282e-06, "loss": 0.4179, "step": 14236 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.073108864167024e-06, "loss": 0.4767, "step": 14237 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0722101585141442e-06, "loss": 0.383, "step": 14238 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0713118080240348e-06, "loss": 0.4804, "step": 14239 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0704138127324392e-06, "loss": 0.4033, "step": 14240 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0695161726750792e-06, "loss": 0.3918, "step": 14241 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0686188878876635e-06, "loss": 0.4089, "step": 14242 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0677219584058895e-06, "loss": 0.3938, "step": 14243 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0668253842654386e-06, "loss": 0.4603, "step": 14244 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0659291655019766e-06, "loss": 0.4705, "step": 14245 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0650333021511571e-06, "loss": 0.4007, "step": 14246 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0641377942486198e-06, "loss": 0.4737, "step": 14247 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.063242641829989e-06, "loss": 0.3719, "step": 14248 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0623478449308755e-06, "loss": 0.407, "step": 14249 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.061453403586874e-06, "loss": 0.3308, "step": 14250 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0605593178335693e-06, "loss": 0.4827, "step": 14251 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.059665587706531e-06, "loss": 0.4278, "step": 14252 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0587722132413115e-06, "loss": 0.437, "step": 14253 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.057879194473449e-06, "loss": 0.3557, "step": 14254 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0569865314384697e-06, "loss": 0.3812, "step": 14255 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0560942241718875e-06, "loss": 0.3899, "step": 14256 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0552022727091994e-06, "loss": 0.4178, "step": 14257 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.054310677085888e-06, "loss": 0.3836, "step": 14258 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0534194373374228e-06, "loss": 0.4157, "step": 14259 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.052528553499258e-06, "loss": 0.4301, "step": 14260 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.051638025606836e-06, "loss": 0.4206, "step": 14261 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0507478536955828e-06, "loss": 0.4565, "step": 14262 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0498580378009103e-06, "loss": 0.3988, "step": 14263 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0489685779582182e-06, "loss": 0.4474, "step": 14264 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0480794742028888e-06, "loss": 0.446, "step": 14265 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.047190726570293e-06, "loss": 0.4208, "step": 14266 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.046302335095788e-06, "loss": 0.4301, "step": 14267 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0454142998147166e-06, "loss": 0.4358, "step": 14268 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0445266207624028e-06, "loss": 0.4393, "step": 14269 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0436392979741616e-06, "loss": 0.3852, "step": 14270 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0427523314852906e-06, "loss": 0.4543, "step": 14271 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0418657213310778e-06, "loss": 0.4081, "step": 14272 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0409794675467932e-06, "loss": 0.3749, "step": 14273 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0400935701676928e-06, "loss": 0.3897, "step": 14274 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0392080292290197e-06, "loss": 0.3491, "step": 14275 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0383228447660021e-06, "loss": 0.3816, "step": 14276 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0374380168138532e-06, "loss": 0.4293, "step": 14277 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0365535454077736e-06, "loss": 0.4711, "step": 14278 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0356694305829496e-06, "loss": 0.3718, "step": 14279 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.034785672374552e-06, "loss": 0.3818, "step": 14280 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0339022708177393e-06, "loss": 0.4306, "step": 14281 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0330192259476535e-06, "loss": 0.3829, "step": 14282 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.032136537799424e-06, "loss": 0.553, "step": 14283 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0312542064081666e-06, "loss": 0.4268, "step": 14284 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0303722318089816e-06, "loss": 0.3603, "step": 14285 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0294906140369542e-06, "loss": 0.411, "step": 14286 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 1.0286093531271567e-06, "loss": 0.3699, "step": 14287 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0277284491146477e-06, "loss": 0.4416, "step": 14288 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0268479020344723e-06, "loss": 0.4271, "step": 14289 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0259677119216594e-06, "loss": 0.4356, "step": 14290 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0250878788112261e-06, "loss": 0.4357, "step": 14291 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0242084027381704e-06, "loss": 0.4531, "step": 14292 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0233292837374775e-06, "loss": 0.4663, "step": 14293 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0224505218441271e-06, "loss": 0.4904, "step": 14294 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0215721170930737e-06, "loss": 0.4033, "step": 14295 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0206940695192623e-06, "loss": 0.5159, "step": 14296 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0198163791576233e-06, "loss": 0.4115, "step": 14297 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0189390460430725e-06, "loss": 0.442, "step": 14298 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0180620702105126e-06, "loss": 0.4396, "step": 14299 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0171854516948298e-06, "loss": 0.4074, "step": 14300 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0163091905308987e-06, "loss": 0.396, "step": 14301 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0154332867535776e-06, "loss": 0.4199, "step": 14302 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0145577403977114e-06, "loss": 0.3822, "step": 14303 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0136825514981298e-06, "loss": 0.3663, "step": 14304 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0128077200896524e-06, "loss": 0.4605, "step": 14305 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0119332462070819e-06, "loss": 0.335, "step": 14306 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0110591298852013e-06, "loss": 0.4223, "step": 14307 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0101853711587882e-06, "loss": 0.4585, "step": 14308 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0093119700625998e-06, "loss": 0.3594, "step": 14309 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0084389266313855e-06, "loss": 0.3919, "step": 14310 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0075662408998732e-06, "loss": 0.4422, "step": 14311 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0066939129027808e-06, "loss": 0.4345, "step": 14312 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0058219426748116e-06, "loss": 0.3944, "step": 14313 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0049503302506524e-06, "loss": 0.4766, "step": 14314 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0040790756649798e-06, "loss": 0.4188, "step": 14315 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0032081789524517e-06, "loss": 0.4082, "step": 14316 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0023376401477135e-06, "loss": 0.3754, "step": 14317 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.001467459285399e-06, "loss": 0.4115, "step": 14318 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 1.0005976364001247e-06, "loss": 0.4372, "step": 14319 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.997281715264928e-07, "loss": 0.4127, "step": 14320 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.988590646990925e-07, "loss": 0.3221, "step": 14321 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.979903159524984e-07, "loss": 0.4001, "step": 14322 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.971219253212705e-07, "loss": 0.5143, "step": 14323 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.962538928399557e-07, "loss": 0.3513, "step": 14324 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.953862185430851e-07, "loss": 0.3534, "step": 14325 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.945189024651747e-07, "loss": 0.4163, "step": 14326 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.936519446407312e-07, "loss": 0.3513, "step": 14327 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.927853451042424e-07, "loss": 0.4228, "step": 14328 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.919191038901843e-07, "loss": 0.413, "step": 14329 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.910532210330148e-07, "loss": 0.4127, "step": 14330 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.901876965671787e-07, "loss": 0.481, "step": 14331 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.893225305271126e-07, "loss": 0.3537, "step": 14332 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.884577229472326e-07, "loss": 0.4674, "step": 14333 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.875932738619421e-07, "loss": 0.4611, "step": 14334 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.867291833056303e-07, "loss": 0.4037, "step": 14335 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.85865451312672e-07, "loss": 0.486, "step": 14336 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.850020779174285e-07, "loss": 0.3689, "step": 14337 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.841390631542457e-07, "loss": 0.4205, "step": 14338 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.832764070574551e-07, "loss": 0.3563, "step": 14339 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.82414109661377e-07, "loss": 0.4067, "step": 14340 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.815521710003129e-07, "loss": 0.4504, "step": 14341 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.806905911085507e-07, "loss": 0.4243, "step": 14342 }, { "epoch": 2.59, "grad_norm": 0.0, "learning_rate": 9.798293700203698e-07, "loss": 0.426, "step": 14343 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.789685077700318e-07, "loss": 0.46, "step": 14344 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.78108004391778e-07, "loss": 0.4787, "step": 14345 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.77247859919842e-07, "loss": 0.4309, "step": 14346 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.763880743884423e-07, "loss": 0.3373, "step": 14347 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.75528647831785e-07, "loss": 0.4421, "step": 14348 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.746695802840577e-07, "loss": 0.4147, "step": 14349 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.73810871779436e-07, "loss": 0.4892, "step": 14350 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.72952522352082e-07, "loss": 0.4271, "step": 14351 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.720945320361374e-07, "loss": 0.3777, "step": 14352 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.712369008657396e-07, "loss": 0.3961, "step": 14353 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.703796288750056e-07, "loss": 0.4397, "step": 14354 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.695227160980382e-07, "loss": 0.4071, "step": 14355 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.686661625689264e-07, "loss": 0.3645, "step": 14356 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.678099683217468e-07, "loss": 0.4226, "step": 14357 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.669541333905597e-07, "loss": 0.4206, "step": 14358 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.660986578094122e-07, "loss": 0.3957, "step": 14359 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.652435416123351e-07, "loss": 0.4224, "step": 14360 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.643887848333477e-07, "loss": 0.4433, "step": 14361 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.63534387506454e-07, "loss": 0.5089, "step": 14362 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.626803496656422e-07, "loss": 0.4966, "step": 14363 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.618266713448855e-07, "loss": 0.3983, "step": 14364 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.609733525781494e-07, "loss": 0.4545, "step": 14365 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.601203933993774e-07, "loss": 0.353, "step": 14366 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.592677938425054e-07, "loss": 0.4255, "step": 14367 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.584155539414453e-07, "loss": 0.4337, "step": 14368 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.57563673730102e-07, "loss": 0.3822, "step": 14369 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.567121532423683e-07, "loss": 0.378, "step": 14370 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.558609925121176e-07, "loss": 0.4901, "step": 14371 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.55010191573209e-07, "loss": 0.4063, "step": 14372 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.541597504594902e-07, "loss": 0.4944, "step": 14373 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.533096692047938e-07, "loss": 0.4215, "step": 14374 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.524599478429353e-07, "loss": 0.4477, "step": 14375 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.516105864077208e-07, "loss": 0.3783, "step": 14376 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.507615849329365e-07, "loss": 0.425, "step": 14377 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.49912943452359e-07, "loss": 0.3772, "step": 14378 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.49064661999749e-07, "loss": 0.4506, "step": 14379 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.482167406088494e-07, "loss": 0.4203, "step": 14380 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.473691793133966e-07, "loss": 0.3679, "step": 14381 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.46521978147108e-07, "loss": 0.3704, "step": 14382 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.456751371436812e-07, "loss": 0.4441, "step": 14383 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.448286563368103e-07, "loss": 0.3685, "step": 14384 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.439825357601651e-07, "loss": 0.5149, "step": 14385 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.431367754474097e-07, "loss": 0.5133, "step": 14386 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.422913754321894e-07, "loss": 0.4244, "step": 14387 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.414463357481341e-07, "loss": 0.4274, "step": 14388 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.406016564288644e-07, "loss": 0.4394, "step": 14389 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.397573375079771e-07, "loss": 0.4534, "step": 14390 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.389133790190652e-07, "loss": 0.3876, "step": 14391 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.380697809957007e-07, "loss": 0.432, "step": 14392 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.372265434714456e-07, "loss": 0.4651, "step": 14393 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.363836664798431e-07, "loss": 0.4527, "step": 14394 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.355411500544265e-07, "loss": 0.3467, "step": 14395 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.346989942287077e-07, "loss": 0.4116, "step": 14396 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.338571990361978e-07, "loss": 0.3606, "step": 14397 }, { "epoch": 2.6, "grad_norm": 0.0, "learning_rate": 9.330157645103765e-07, "loss": 0.3892, "step": 14398 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.321746906847218e-07, "loss": 0.389, "step": 14399 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.313339775926911e-07, "loss": 0.3746, "step": 14400 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.304936252677288e-07, "loss": 0.4829, "step": 14401 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.296536337432693e-07, "loss": 0.3913, "step": 14402 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.28814003052726e-07, "loss": 0.4076, "step": 14403 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.27974733229503e-07, "loss": 0.4426, "step": 14404 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.271358243069861e-07, "loss": 0.424, "step": 14405 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.262972763185452e-07, "loss": 0.413, "step": 14406 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.254590892975456e-07, "loss": 0.3405, "step": 14407 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.246212632773288e-07, "loss": 0.3783, "step": 14408 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.237837982912246e-07, "loss": 0.4181, "step": 14409 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.229466943725496e-07, "loss": 0.3756, "step": 14410 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.22109951554605e-07, "loss": 0.442, "step": 14411 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.212735698706776e-07, "loss": 0.4507, "step": 14412 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.204375493540408e-07, "loss": 0.4215, "step": 14413 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.196018900379522e-07, "loss": 0.3594, "step": 14414 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.187665919556565e-07, "loss": 0.4745, "step": 14415 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.179316551403816e-07, "loss": 0.4124, "step": 14416 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.170970796253453e-07, "loss": 0.4073, "step": 14417 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.162628654437445e-07, "loss": 0.4402, "step": 14418 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.154290126287702e-07, "loss": 0.3715, "step": 14419 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.145955212135937e-07, "loss": 0.4528, "step": 14420 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.137623912313698e-07, "loss": 0.4274, "step": 14421 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.12929622715244e-07, "loss": 0.4663, "step": 14422 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.120972156983432e-07, "loss": 0.4426, "step": 14423 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.112651702137853e-07, "loss": 0.4522, "step": 14424 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.104334862946684e-07, "loss": 0.353, "step": 14425 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.096021639740793e-07, "loss": 0.4139, "step": 14426 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.087712032850904e-07, "loss": 0.3529, "step": 14427 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.07940604260753e-07, "loss": 0.3434, "step": 14428 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.071103669341164e-07, "loss": 0.3791, "step": 14429 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.062804913382061e-07, "loss": 0.4307, "step": 14430 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.054509775060372e-07, "loss": 0.3929, "step": 14431 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.046218254706074e-07, "loss": 0.3319, "step": 14432 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.037930352649027e-07, "loss": 0.4335, "step": 14433 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.029646069218912e-07, "loss": 0.3867, "step": 14434 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.021365404745363e-07, "loss": 0.4209, "step": 14435 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.013088359557732e-07, "loss": 0.4111, "step": 14436 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 9.004814933985317e-07, "loss": 0.4249, "step": 14437 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.996545128357248e-07, "loss": 0.4453, "step": 14438 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.988278943002482e-07, "loss": 0.4382, "step": 14439 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.980016378249922e-07, "loss": 0.4267, "step": 14440 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.97175743442823e-07, "loss": 0.3942, "step": 14441 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.963502111865996e-07, "loss": 0.3492, "step": 14442 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.955250410891569e-07, "loss": 0.3939, "step": 14443 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.947002331833243e-07, "loss": 0.424, "step": 14444 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.938757875019155e-07, "loss": 0.4088, "step": 14445 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.930517040777287e-07, "loss": 0.4138, "step": 14446 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.922279829435454e-07, "loss": 0.4545, "step": 14447 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.91404624132135e-07, "loss": 0.2837, "step": 14448 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.905816276762536e-07, "loss": 0.4208, "step": 14449 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.897589936086393e-07, "loss": 0.4447, "step": 14450 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.889367219620193e-07, "loss": 0.4251, "step": 14451 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.881148127691052e-07, "loss": 0.3363, "step": 14452 }, { "epoch": 2.61, "grad_norm": 0.0, "learning_rate": 8.872932660625933e-07, "loss": 0.408, "step": 14453 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.864720818751649e-07, "loss": 0.4121, "step": 14454 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.856512602394907e-07, "loss": 0.3711, "step": 14455 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.848308011882212e-07, "loss": 0.4681, "step": 14456 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.840107047540014e-07, "loss": 0.3496, "step": 14457 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.831909709694497e-07, "loss": 0.4263, "step": 14458 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.823715998671789e-07, "loss": 0.3836, "step": 14459 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.815525914797862e-07, "loss": 0.3643, "step": 14460 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.807339458398501e-07, "loss": 0.4202, "step": 14461 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.79915662979941e-07, "loss": 0.452, "step": 14462 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.79097742932612e-07, "loss": 0.3582, "step": 14463 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.782801857303979e-07, "loss": 0.4209, "step": 14464 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.774629914058274e-07, "loss": 0.418, "step": 14465 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.766461599914022e-07, "loss": 0.401, "step": 14466 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.758296915196251e-07, "loss": 0.4483, "step": 14467 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.750135860229725e-07, "loss": 0.4239, "step": 14468 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.741978435339116e-07, "loss": 0.3561, "step": 14469 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.733824640848943e-07, "loss": 0.4954, "step": 14470 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.725674477083568e-07, "loss": 0.3373, "step": 14471 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.7175279443672e-07, "loss": 0.4028, "step": 14472 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.709385043023988e-07, "loss": 0.3766, "step": 14473 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.70124577337782e-07, "loss": 0.492, "step": 14474 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.69311013575248e-07, "loss": 0.3968, "step": 14475 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.684978130471655e-07, "loss": 0.48, "step": 14476 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.676849757858796e-07, "loss": 0.4235, "step": 14477 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.668725018237334e-07, "loss": 0.4643, "step": 14478 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.660603911930432e-07, "loss": 0.4026, "step": 14479 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.65248643926121e-07, "loss": 0.4583, "step": 14480 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.644372600552542e-07, "loss": 0.3879, "step": 14481 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.636262396127215e-07, "loss": 0.4528, "step": 14482 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.628155826307904e-07, "loss": 0.3898, "step": 14483 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.620052891417086e-07, "loss": 0.3647, "step": 14484 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.611953591777101e-07, "loss": 0.3897, "step": 14485 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.603857927710157e-07, "loss": 0.3664, "step": 14486 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.595765899538322e-07, "loss": 0.4355, "step": 14487 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.587677507583502e-07, "loss": 0.4874, "step": 14488 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.579592752167465e-07, "loss": 0.4121, "step": 14489 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.57151163361184e-07, "loss": 0.4493, "step": 14490 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.563434152238115e-07, "loss": 0.4537, "step": 14491 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.55536030836761e-07, "loss": 0.5263, "step": 14492 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.547290102321514e-07, "loss": 0.4319, "step": 14493 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.539223534420893e-07, "loss": 0.3522, "step": 14494 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.531160604986666e-07, "loss": 0.3926, "step": 14495 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.523101314339555e-07, "loss": 0.4526, "step": 14496 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.51504566280017e-07, "loss": 0.4489, "step": 14497 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.506993650688978e-07, "loss": 0.4488, "step": 14498 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.498945278326331e-07, "loss": 0.3688, "step": 14499 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.490900546032388e-07, "loss": 0.3844, "step": 14500 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.482859454127191e-07, "loss": 0.3474, "step": 14501 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.474822002930616e-07, "loss": 0.3747, "step": 14502 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.46678819276242e-07, "loss": 0.4443, "step": 14503 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.458758023942193e-07, "loss": 0.3288, "step": 14504 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.450731496789388e-07, "loss": 0.458, "step": 14505 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.442708611623318e-07, "loss": 0.4031, "step": 14506 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.434689368763149e-07, "loss": 0.4047, "step": 14507 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.426673768527893e-07, "loss": 0.4087, "step": 14508 }, { "epoch": 2.62, "grad_norm": 0.0, "learning_rate": 8.418661811236429e-07, "loss": 0.4619, "step": 14509 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.410653497207489e-07, "loss": 0.3304, "step": 14510 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.402648826759652e-07, "loss": 0.3874, "step": 14511 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.394647800211353e-07, "loss": 0.4975, "step": 14512 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.386650417880904e-07, "loss": 0.3967, "step": 14513 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.378656680086439e-07, "loss": 0.3868, "step": 14514 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.370666587145948e-07, "loss": 0.3681, "step": 14515 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.362680139377321e-07, "loss": 0.3671, "step": 14516 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.354697337098271e-07, "loss": 0.491, "step": 14517 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.346718180626378e-07, "loss": 0.5019, "step": 14518 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.338742670279021e-07, "loss": 0.3912, "step": 14519 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.3307708063735e-07, "loss": 0.4748, "step": 14520 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.322802589226964e-07, "loss": 0.3933, "step": 14521 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.31483801915639e-07, "loss": 0.3649, "step": 14522 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.306877096478627e-07, "loss": 0.404, "step": 14523 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.298919821510365e-07, "loss": 0.4568, "step": 14524 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.290966194568173e-07, "loss": 0.348, "step": 14525 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.283016215968454e-07, "loss": 0.3838, "step": 14526 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.275069886027465e-07, "loss": 0.4645, "step": 14527 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.267127205061331e-07, "loss": 0.3921, "step": 14528 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.259188173386023e-07, "loss": 0.4267, "step": 14529 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.251252791317377e-07, "loss": 0.4366, "step": 14530 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.24332105917105e-07, "loss": 0.453, "step": 14531 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.235392977262613e-07, "loss": 0.4491, "step": 14532 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.227468545907458e-07, "loss": 0.4081, "step": 14533 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.21954776542081e-07, "loss": 0.4183, "step": 14534 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.211630636117773e-07, "loss": 0.4084, "step": 14535 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.203717158313296e-07, "loss": 0.5033, "step": 14536 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.195807332322225e-07, "loss": 0.446, "step": 14537 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.18790115845921e-07, "loss": 0.4934, "step": 14538 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.179998637038766e-07, "loss": 0.3971, "step": 14539 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.172099768375274e-07, "loss": 0.3716, "step": 14540 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.164204552782972e-07, "loss": 0.4428, "step": 14541 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.156312990575921e-07, "loss": 0.4206, "step": 14542 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.14842508206809e-07, "loss": 0.4323, "step": 14543 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.140540827573251e-07, "loss": 0.4835, "step": 14544 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.132660227405065e-07, "loss": 0.4242, "step": 14545 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.124783281877036e-07, "loss": 0.3798, "step": 14546 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.116909991302513e-07, "loss": 0.4053, "step": 14547 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.109040355994713e-07, "loss": 0.4228, "step": 14548 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.101174376266707e-07, "loss": 0.4193, "step": 14549 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.093312052431423e-07, "loss": 0.3196, "step": 14550 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.085453384801622e-07, "loss": 0.4039, "step": 14551 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.077598373689944e-07, "loss": 0.3782, "step": 14552 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.069747019408858e-07, "loss": 0.3954, "step": 14553 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.061899322270749e-07, "loss": 0.4663, "step": 14554 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.054055282587769e-07, "loss": 0.3579, "step": 14555 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.046214900672012e-07, "loss": 0.4217, "step": 14556 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.038378176835326e-07, "loss": 0.4241, "step": 14557 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.030545111389476e-07, "loss": 0.3892, "step": 14558 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.022715704646112e-07, "loss": 0.3174, "step": 14559 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.014889956916694e-07, "loss": 0.4211, "step": 14560 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 8.007067868512519e-07, "loss": 0.3839, "step": 14561 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 7.999249439744783e-07, "loss": 0.3986, "step": 14562 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 7.991434670924513e-07, "loss": 0.4229, "step": 14563 }, { "epoch": 2.63, "grad_norm": 0.0, "learning_rate": 7.983623562362585e-07, "loss": 0.4634, "step": 14564 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.97581611436975e-07, "loss": 0.4932, "step": 14565 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.968012327256591e-07, "loss": 0.4434, "step": 14566 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.960212201333561e-07, "loss": 0.45, "step": 14567 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.952415736910968e-07, "loss": 0.4779, "step": 14568 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.94462293429894e-07, "loss": 0.3853, "step": 14569 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.93683379380753e-07, "loss": 0.4483, "step": 14570 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.929048315746613e-07, "loss": 0.4527, "step": 14571 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.92126650042585e-07, "loss": 0.4075, "step": 14572 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.913488348154852e-07, "loss": 0.3387, "step": 14573 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.905713859243014e-07, "loss": 0.4447, "step": 14574 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.897943033999667e-07, "loss": 0.479, "step": 14575 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.89017587273393e-07, "loss": 0.3906, "step": 14576 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.882412375754789e-07, "loss": 0.4538, "step": 14577 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.874652543371087e-07, "loss": 0.3713, "step": 14578 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.86689637589153e-07, "loss": 0.3564, "step": 14579 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.859143873624664e-07, "loss": 0.4518, "step": 14580 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.851395036878895e-07, "loss": 0.4608, "step": 14581 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.843649865962499e-07, "loss": 0.4368, "step": 14582 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.835908361183587e-07, "loss": 0.4074, "step": 14583 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.82817052285012e-07, "loss": 0.4443, "step": 14584 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.820436351269911e-07, "loss": 0.4518, "step": 14585 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.81270584675069e-07, "loss": 0.458, "step": 14586 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.804979009599944e-07, "loss": 0.3749, "step": 14587 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.797255840125073e-07, "loss": 0.2967, "step": 14588 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.789536338633308e-07, "loss": 0.4748, "step": 14589 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.781820505431737e-07, "loss": 0.4402, "step": 14590 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.774108340827347e-07, "loss": 0.4261, "step": 14591 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.766399845126916e-07, "loss": 0.462, "step": 14592 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.758695018637108e-07, "loss": 0.4063, "step": 14593 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.750993861664446e-07, "loss": 0.4279, "step": 14594 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.743296374515241e-07, "loss": 0.4602, "step": 14595 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.73560255749577e-07, "loss": 0.3969, "step": 14596 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.727912410912097e-07, "loss": 0.4618, "step": 14597 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.720225935070125e-07, "loss": 0.4454, "step": 14598 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.712543130275651e-07, "loss": 0.3979, "step": 14599 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.70486399683431e-07, "loss": 0.3164, "step": 14600 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.697188535051591e-07, "loss": 0.3515, "step": 14601 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.689516745232839e-07, "loss": 0.4667, "step": 14602 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.681848627683242e-07, "loss": 0.396, "step": 14603 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.674184182707866e-07, "loss": 0.3702, "step": 14604 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.666523410611593e-07, "loss": 0.4555, "step": 14605 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.658866311699209e-07, "loss": 0.4009, "step": 14606 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.651212886275283e-07, "loss": 0.4822, "step": 14607 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.643563134644327e-07, "loss": 0.4505, "step": 14608 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.635917057110664e-07, "loss": 0.3495, "step": 14609 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.62827465397844e-07, "loss": 0.3727, "step": 14610 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.620635925551689e-07, "loss": 0.408, "step": 14611 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.613000872134268e-07, "loss": 0.3507, "step": 14612 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.605369494029968e-07, "loss": 0.3887, "step": 14613 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.597741791542346e-07, "loss": 0.4315, "step": 14614 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.590117764974859e-07, "loss": 0.3391, "step": 14615 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.582497414630796e-07, "loss": 0.4602, "step": 14616 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.574880740813273e-07, "loss": 0.4406, "step": 14617 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.567267743825358e-07, "loss": 0.4214, "step": 14618 }, { "epoch": 2.64, "grad_norm": 0.0, "learning_rate": 7.559658423969863e-07, "loss": 0.4993, "step": 14619 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.552052781549524e-07, "loss": 0.4616, "step": 14620 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.544450816866899e-07, "loss": 0.4681, "step": 14621 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.536852530224403e-07, "loss": 0.421, "step": 14622 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.529257921924294e-07, "loss": 0.4408, "step": 14623 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.521666992268739e-07, "loss": 0.4175, "step": 14624 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.514079741559699e-07, "loss": 0.4747, "step": 14625 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.506496170098987e-07, "loss": 0.3836, "step": 14626 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.498916278188318e-07, "loss": 0.433, "step": 14627 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.491340066129204e-07, "loss": 0.4106, "step": 14628 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.483767534223063e-07, "loss": 0.4898, "step": 14629 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.476198682771152e-07, "loss": 0.4161, "step": 14630 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.468633512074552e-07, "loss": 0.3977, "step": 14631 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.461072022434257e-07, "loss": 0.3549, "step": 14632 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.453514214150992e-07, "loss": 0.4191, "step": 14633 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.445960087525505e-07, "loss": 0.3901, "step": 14634 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.438409642858268e-07, "loss": 0.4101, "step": 14635 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.430862880449674e-07, "loss": 0.4183, "step": 14636 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.423319800599926e-07, "loss": 0.5343, "step": 14637 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.415780403609108e-07, "loss": 0.437, "step": 14638 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.408244689777155e-07, "loss": 0.4265, "step": 14639 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.400712659403841e-07, "loss": 0.3579, "step": 14640 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.393184312788815e-07, "loss": 0.4718, "step": 14641 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.385659650231558e-07, "loss": 0.3419, "step": 14642 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.378138672031421e-07, "loss": 0.5398, "step": 14643 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.370621378487597e-07, "loss": 0.4615, "step": 14644 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.363107769899114e-07, "loss": 0.4563, "step": 14645 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.355597846564922e-07, "loss": 0.4031, "step": 14646 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.348091608783769e-07, "loss": 0.4462, "step": 14647 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.340589056854241e-07, "loss": 0.4647, "step": 14648 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.333090191074821e-07, "loss": 0.4288, "step": 14649 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.325595011743791e-07, "loss": 0.4211, "step": 14650 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.318103519159381e-07, "loss": 0.4051, "step": 14651 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.310615713619573e-07, "loss": 0.3697, "step": 14652 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.303131595422263e-07, "loss": 0.3722, "step": 14653 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.295651164865192e-07, "loss": 0.512, "step": 14654 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.288174422245897e-07, "loss": 0.3495, "step": 14655 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.280701367861864e-07, "loss": 0.5051, "step": 14656 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.273232002010366e-07, "loss": 0.4168, "step": 14657 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.265766324988555e-07, "loss": 0.4414, "step": 14658 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.258304337093414e-07, "loss": 0.4046, "step": 14659 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.250846038621806e-07, "loss": 0.4696, "step": 14660 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.243391429870417e-07, "loss": 0.4034, "step": 14661 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.235940511135853e-07, "loss": 0.4245, "step": 14662 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.228493282714466e-07, "loss": 0.3969, "step": 14663 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.221049744902542e-07, "loss": 0.362, "step": 14664 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.213609897996199e-07, "loss": 0.3963, "step": 14665 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.206173742291389e-07, "loss": 0.3445, "step": 14666 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.198741278083976e-07, "loss": 0.3912, "step": 14667 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.191312505669601e-07, "loss": 0.4052, "step": 14668 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.183887425343827e-07, "loss": 0.5016, "step": 14669 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.176466037401996e-07, "loss": 0.4235, "step": 14670 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.16904834213935e-07, "loss": 0.4174, "step": 14671 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.161634339851009e-07, "loss": 0.4558, "step": 14672 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.15422403083188e-07, "loss": 0.4685, "step": 14673 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 7.146817415376784e-07, "loss": 0.4181, "step": 14674 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.13941449378035e-07, "loss": 0.405, "step": 14675 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.1320152663371e-07, "loss": 0.4114, "step": 14676 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.124619733341365e-07, "loss": 0.5053, "step": 14677 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.117227895087353e-07, "loss": 0.442, "step": 14678 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.109839751869141e-07, "loss": 0.3992, "step": 14679 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.102455303980638e-07, "loss": 0.4326, "step": 14680 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.095074551715597e-07, "loss": 0.3991, "step": 14681 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.08769749536764e-07, "loss": 0.461, "step": 14682 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.080324135230255e-07, "loss": 0.4362, "step": 14683 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.072954471596749e-07, "loss": 0.466, "step": 14684 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.065588504760324e-07, "loss": 0.3758, "step": 14685 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.058226235013987e-07, "loss": 0.3901, "step": 14686 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.050867662650606e-07, "loss": 0.4717, "step": 14687 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.043512787962958e-07, "loss": 0.4232, "step": 14688 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.036161611243619e-07, "loss": 0.4121, "step": 14689 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.028814132785022e-07, "loss": 0.4401, "step": 14690 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.021470352879467e-07, "loss": 0.4092, "step": 14691 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.014130271819097e-07, "loss": 0.3513, "step": 14692 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 7.006793889895924e-07, "loss": 0.3862, "step": 14693 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.999461207401803e-07, "loss": 0.4453, "step": 14694 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.992132224628423e-07, "loss": 0.4588, "step": 14695 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.984806941867362e-07, "loss": 0.3556, "step": 14696 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.977485359410019e-07, "loss": 0.4247, "step": 14697 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.970167477547662e-07, "loss": 0.4321, "step": 14698 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.962853296571404e-07, "loss": 0.4132, "step": 14699 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.955542816772254e-07, "loss": 0.468, "step": 14700 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.948236038440992e-07, "loss": 0.3777, "step": 14701 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.940932961868296e-07, "loss": 0.4185, "step": 14702 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.933633587344712e-07, "loss": 0.5296, "step": 14703 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.926337915160597e-07, "loss": 0.418, "step": 14704 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.919045945606229e-07, "loss": 0.4433, "step": 14705 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.911757678971664e-07, "loss": 0.4549, "step": 14706 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.904473115546862e-07, "loss": 0.391, "step": 14707 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.897192255621577e-07, "loss": 0.4577, "step": 14708 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.889915099485478e-07, "loss": 0.3538, "step": 14709 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.882641647428068e-07, "loss": 0.41, "step": 14710 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.875371899738692e-07, "loss": 0.4222, "step": 14711 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.868105856706564e-07, "loss": 0.3525, "step": 14712 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.860843518620719e-07, "loss": 0.2985, "step": 14713 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.85358488577007e-07, "loss": 0.425, "step": 14714 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.846329958443398e-07, "loss": 0.3944, "step": 14715 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.839078736929294e-07, "loss": 0.359, "step": 14716 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.831831221516228e-07, "loss": 0.3974, "step": 14717 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.824587412492522e-07, "loss": 0.4525, "step": 14718 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.81734731014635e-07, "loss": 0.3903, "step": 14719 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.810110914765722e-07, "loss": 0.4356, "step": 14720 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.802878226638532e-07, "loss": 0.4003, "step": 14721 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.795649246052516e-07, "loss": 0.4696, "step": 14722 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.788423973295233e-07, "loss": 0.4338, "step": 14723 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.781202408654119e-07, "loss": 0.3994, "step": 14724 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.773984552416458e-07, "loss": 0.4084, "step": 14725 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.766770404869405e-07, "loss": 0.4698, "step": 14726 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.759559966299944e-07, "loss": 0.3748, "step": 14727 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.752353236994924e-07, "loss": 0.4745, "step": 14728 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.745150217241026e-07, "loss": 0.468, "step": 14729 }, { "epoch": 2.66, "grad_norm": 0.0, "learning_rate": 6.737950907324808e-07, "loss": 0.4206, "step": 14730 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.730755307532666e-07, "loss": 0.4046, "step": 14731 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.723563418150869e-07, "loss": 0.3843, "step": 14732 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.7163752394655e-07, "loss": 0.4109, "step": 14733 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.70919077176253e-07, "loss": 0.3752, "step": 14734 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.702010015327764e-07, "loss": 0.4442, "step": 14735 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.694832970446874e-07, "loss": 0.4737, "step": 14736 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.687659637405352e-07, "loss": 0.4968, "step": 14737 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.680490016488616e-07, "loss": 0.4614, "step": 14738 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.673324107981838e-07, "loss": 0.4188, "step": 14739 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.666161912170099e-07, "loss": 0.4296, "step": 14740 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.659003429338329e-07, "loss": 0.4133, "step": 14741 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.651848659771287e-07, "loss": 0.3714, "step": 14742 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.644697603753635e-07, "loss": 0.4362, "step": 14743 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.637550261569848e-07, "loss": 0.3986, "step": 14744 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.630406633504261e-07, "loss": 0.4875, "step": 14745 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.623266719841026e-07, "loss": 0.4534, "step": 14746 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.616130520864194e-07, "loss": 0.3677, "step": 14747 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.608998036857684e-07, "loss": 0.4521, "step": 14748 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.601869268105221e-07, "loss": 0.3944, "step": 14749 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.594744214890392e-07, "loss": 0.3961, "step": 14750 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.587622877496658e-07, "loss": 0.3705, "step": 14751 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.580505256207303e-07, "loss": 0.4035, "step": 14752 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.573391351305492e-07, "loss": 0.4717, "step": 14753 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.566281163074217e-07, "loss": 0.3218, "step": 14754 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.559174691796332e-07, "loss": 0.4571, "step": 14755 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.552071937754567e-07, "loss": 0.3952, "step": 14756 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.54497290123145e-07, "loss": 0.4315, "step": 14757 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.5378775825094e-07, "loss": 0.4398, "step": 14758 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.530785981870702e-07, "loss": 0.3898, "step": 14759 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.523698099597476e-07, "loss": 0.3908, "step": 14760 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.51661393597166e-07, "loss": 0.3389, "step": 14761 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.509533491275088e-07, "loss": 0.3997, "step": 14762 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.502456765789411e-07, "loss": 0.4065, "step": 14763 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.495383759796192e-07, "loss": 0.4332, "step": 14764 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.488314473576795e-07, "loss": 0.4079, "step": 14765 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.481248907412429e-07, "loss": 0.3621, "step": 14766 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.474187061584203e-07, "loss": 0.3807, "step": 14767 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.467128936373024e-07, "loss": 0.4171, "step": 14768 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.460074532059691e-07, "loss": 0.3331, "step": 14769 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.453023848924833e-07, "loss": 0.4239, "step": 14770 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.44597688724895e-07, "loss": 0.4521, "step": 14771 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.438933647312362e-07, "loss": 0.3698, "step": 14772 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.431894129395277e-07, "loss": 0.4038, "step": 14773 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.424858333777728e-07, "loss": 0.365, "step": 14774 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.417826260739635e-07, "loss": 0.4424, "step": 14775 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.410797910560717e-07, "loss": 0.4474, "step": 14776 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.403773283520587e-07, "loss": 0.3828, "step": 14777 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.396752379898697e-07, "loss": 0.3897, "step": 14778 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.389735199974334e-07, "loss": 0.3797, "step": 14779 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.382721744026677e-07, "loss": 0.3981, "step": 14780 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.375712012334722e-07, "loss": 0.358, "step": 14781 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.368706005177338e-07, "loss": 0.48, "step": 14782 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.361703722833246e-07, "loss": 0.4377, "step": 14783 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.354705165580965e-07, "loss": 0.4258, "step": 14784 }, { "epoch": 2.67, "grad_norm": 0.0, "learning_rate": 6.347710333698931e-07, "loss": 0.3718, "step": 14785 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.340719227465431e-07, "loss": 0.4483, "step": 14786 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.333731847158553e-07, "loss": 0.4148, "step": 14787 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.326748193056298e-07, "loss": 0.4368, "step": 14788 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.319768265436465e-07, "loss": 0.4227, "step": 14789 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.312792064576733e-07, "loss": 0.3596, "step": 14790 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.305819590754625e-07, "loss": 0.4303, "step": 14791 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.298850844247518e-07, "loss": 0.3968, "step": 14792 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.291885825332655e-07, "loss": 0.3635, "step": 14793 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.284924534287096e-07, "loss": 0.4078, "step": 14794 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.277966971387783e-07, "loss": 0.3552, "step": 14795 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.271013136911486e-07, "loss": 0.4489, "step": 14796 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.26406303113487e-07, "loss": 0.3541, "step": 14797 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.257116654334416e-07, "loss": 0.376, "step": 14798 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.250174006786436e-07, "loss": 0.4466, "step": 14799 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.243235088767141e-07, "loss": 0.4024, "step": 14800 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.236299900552545e-07, "loss": 0.4839, "step": 14801 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.229368442418593e-07, "loss": 0.3577, "step": 14802 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.222440714640987e-07, "loss": 0.4794, "step": 14803 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.215516717495351e-07, "loss": 0.4513, "step": 14804 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.208596451257121e-07, "loss": 0.3541, "step": 14805 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.201679916201586e-07, "loss": 0.4443, "step": 14806 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.194767112603928e-07, "loss": 0.469, "step": 14807 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.187858040739114e-07, "loss": 0.4059, "step": 14808 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.180952700882026e-07, "loss": 0.4583, "step": 14809 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.174051093307365e-07, "loss": 0.4347, "step": 14810 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.167153218289667e-07, "loss": 0.4301, "step": 14811 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.160259076103359e-07, "loss": 0.4316, "step": 14812 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.15336866702273e-07, "loss": 0.4522, "step": 14813 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.14648199132184e-07, "loss": 0.425, "step": 14814 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.13959904927468e-07, "loss": 0.4818, "step": 14815 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.132719841155066e-07, "loss": 0.4788, "step": 14816 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.125844367236644e-07, "loss": 0.4354, "step": 14817 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.118972627792963e-07, "loss": 0.4735, "step": 14818 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.112104623097382e-07, "loss": 0.452, "step": 14819 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.105240353423103e-07, "loss": 0.3955, "step": 14820 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.098379819043243e-07, "loss": 0.3915, "step": 14821 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.091523020230661e-07, "loss": 0.4228, "step": 14822 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.084669957258182e-07, "loss": 0.4333, "step": 14823 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.077820630398423e-07, "loss": 0.4407, "step": 14824 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.070975039923854e-07, "loss": 0.4084, "step": 14825 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.064133186106802e-07, "loss": 0.4213, "step": 14826 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.05729506921946e-07, "loss": 0.4914, "step": 14827 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.050460689533844e-07, "loss": 0.5011, "step": 14828 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.043630047321858e-07, "loss": 0.4086, "step": 14829 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.036803142855219e-07, "loss": 0.4592, "step": 14830 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.02997997640552e-07, "loss": 0.3779, "step": 14831 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.023160548244189e-07, "loss": 0.3994, "step": 14832 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.016344858642531e-07, "loss": 0.4171, "step": 14833 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.009532907871663e-07, "loss": 0.4145, "step": 14834 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 6.0027246962026e-07, "loss": 0.3627, "step": 14835 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 5.995920223906193e-07, "loss": 0.3389, "step": 14836 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 5.989119491253104e-07, "loss": 0.4765, "step": 14837 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 5.982322498513893e-07, "loss": 0.3876, "step": 14838 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 5.975529245958933e-07, "loss": 0.361, "step": 14839 }, { "epoch": 2.68, "grad_norm": 0.0, "learning_rate": 5.96873973385852e-07, "loss": 0.3567, "step": 14840 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.961953962482714e-07, "loss": 0.4461, "step": 14841 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.955171932101478e-07, "loss": 0.4179, "step": 14842 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.948393642984607e-07, "loss": 0.4195, "step": 14843 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.941619095401763e-07, "loss": 0.3738, "step": 14844 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.93484828962243e-07, "loss": 0.4289, "step": 14845 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.928081225915983e-07, "loss": 0.4631, "step": 14846 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.921317904551615e-07, "loss": 0.4675, "step": 14847 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.914558325798392e-07, "loss": 0.3383, "step": 14848 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.907802489925218e-07, "loss": 0.368, "step": 14849 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.901050397200836e-07, "loss": 0.4207, "step": 14850 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.894302047893885e-07, "loss": 0.4702, "step": 14851 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.887557442272807e-07, "loss": 0.3549, "step": 14852 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.88081658060592e-07, "loss": 0.3749, "step": 14853 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.874079463161375e-07, "loss": 0.3591, "step": 14854 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.867346090207193e-07, "loss": 0.4308, "step": 14855 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.860616462011248e-07, "loss": 0.4684, "step": 14856 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.853890578841248e-07, "loss": 0.4595, "step": 14857 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.847168440964767e-07, "loss": 0.4004, "step": 14858 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.840450048649226e-07, "loss": 0.4135, "step": 14859 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.833735402161867e-07, "loss": 0.3894, "step": 14860 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.82702450176984e-07, "loss": 0.4323, "step": 14861 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.8203173477401e-07, "loss": 0.388, "step": 14862 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.81361394033948e-07, "loss": 0.3412, "step": 14863 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.806914279834652e-07, "loss": 0.3826, "step": 14864 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.800218366492127e-07, "loss": 0.4746, "step": 14865 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.793526200578293e-07, "loss": 0.3799, "step": 14866 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.786837782359367e-07, "loss": 0.4192, "step": 14867 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.780153112101439e-07, "loss": 0.4585, "step": 14868 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.773472190070417e-07, "loss": 0.4722, "step": 14869 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.766795016532101e-07, "loss": 0.4065, "step": 14870 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.7601215917521e-07, "loss": 0.4101, "step": 14871 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.7534519159959e-07, "loss": 0.4824, "step": 14872 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.746785989528847e-07, "loss": 0.3941, "step": 14873 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.740123812616116e-07, "loss": 0.4533, "step": 14874 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.733465385522729e-07, "loss": 0.4253, "step": 14875 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.726810708513586e-07, "loss": 0.4372, "step": 14876 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.720159781853385e-07, "loss": 0.3923, "step": 14877 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.713512605806759e-07, "loss": 0.4847, "step": 14878 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.706869180638119e-07, "loss": 0.3749, "step": 14879 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.700229506611754e-07, "loss": 0.4123, "step": 14880 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.693593583991819e-07, "loss": 0.4952, "step": 14881 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.686961413042258e-07, "loss": 0.4843, "step": 14882 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.680332994026961e-07, "loss": 0.4377, "step": 14883 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.673708327209593e-07, "loss": 0.5179, "step": 14884 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.6670874128537e-07, "loss": 0.4097, "step": 14885 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.660470251222661e-07, "loss": 0.3969, "step": 14886 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.653856842579731e-07, "loss": 0.45, "step": 14887 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.647247187187988e-07, "loss": 0.4835, "step": 14888 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.640641285310421e-07, "loss": 0.3559, "step": 14889 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.634039137209769e-07, "loss": 0.4242, "step": 14890 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.627440743148704e-07, "loss": 0.3879, "step": 14891 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.62084610338971e-07, "loss": 0.3915, "step": 14892 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.61425521819512e-07, "loss": 0.3546, "step": 14893 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.607668087827168e-07, "loss": 0.4223, "step": 14894 }, { "epoch": 2.69, "grad_norm": 0.0, "learning_rate": 5.601084712547889e-07, "loss": 0.4353, "step": 14895 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.594505092619173e-07, "loss": 0.3328, "step": 14896 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.58792922830278e-07, "loss": 0.467, "step": 14897 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.581357119860264e-07, "loss": 0.4395, "step": 14898 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.57478876755313e-07, "loss": 0.4367, "step": 14899 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.568224171642667e-07, "loss": 0.3592, "step": 14900 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.561663332389999e-07, "loss": 0.3812, "step": 14901 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.555106250056164e-07, "loss": 0.376, "step": 14902 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.548552924901984e-07, "loss": 0.4667, "step": 14903 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.542003357188174e-07, "loss": 0.3395, "step": 14904 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.535457547175294e-07, "loss": 0.4071, "step": 14905 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.528915495123733e-07, "loss": 0.3658, "step": 14906 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.52237720129375e-07, "loss": 0.4566, "step": 14907 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.515842665945447e-07, "loss": 0.4474, "step": 14908 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.509311889338776e-07, "loss": 0.3528, "step": 14909 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.502784871733569e-07, "loss": 0.403, "step": 14910 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.496261613389453e-07, "loss": 0.4175, "step": 14911 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.489742114565966e-07, "loss": 0.3451, "step": 14912 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.483226375522421e-07, "loss": 0.5199, "step": 14913 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.476714396518035e-07, "loss": 0.4375, "step": 14914 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.470206177811888e-07, "loss": 0.39, "step": 14915 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.463701719662884e-07, "loss": 0.3762, "step": 14916 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.457201022329773e-07, "loss": 0.3908, "step": 14917 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.450704086071168e-07, "loss": 0.5067, "step": 14918 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.444210911145531e-07, "loss": 0.4391, "step": 14919 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.437721497811154e-07, "loss": 0.3364, "step": 14920 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.431235846326222e-07, "loss": 0.4746, "step": 14921 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.424753956948725e-07, "loss": 0.3756, "step": 14922 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.418275829936537e-07, "loss": 0.3887, "step": 14923 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.411801465547362e-07, "loss": 0.4893, "step": 14924 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.40533086403876e-07, "loss": 0.3977, "step": 14925 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.398864025668138e-07, "loss": 0.4187, "step": 14926 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.392400950692789e-07, "loss": 0.4064, "step": 14927 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.385941639369796e-07, "loss": 0.3748, "step": 14928 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.379486091956121e-07, "loss": 0.4449, "step": 14929 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.373034308708581e-07, "loss": 0.4122, "step": 14930 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.366586289883835e-07, "loss": 0.4781, "step": 14931 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.360142035738414e-07, "loss": 0.4392, "step": 14932 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.353701546528667e-07, "loss": 0.4235, "step": 14933 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.347264822510822e-07, "loss": 0.4661, "step": 14934 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.34083186394092e-07, "loss": 0.3721, "step": 14935 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.334402671074868e-07, "loss": 0.3646, "step": 14936 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.327977244168469e-07, "loss": 0.3881, "step": 14937 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.321555583477322e-07, "loss": 0.4827, "step": 14938 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.315137689256878e-07, "loss": 0.3954, "step": 14939 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.308723561762463e-07, "loss": 0.3872, "step": 14940 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.302313201249243e-07, "loss": 0.4724, "step": 14941 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.295906607972223e-07, "loss": 0.4035, "step": 14942 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.28950378218629e-07, "loss": 0.3887, "step": 14943 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.283104724146126e-07, "loss": 0.3775, "step": 14944 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.276709434106331e-07, "loss": 0.4061, "step": 14945 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.270317912321299e-07, "loss": 0.3905, "step": 14946 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.263930159045283e-07, "loss": 0.4354, "step": 14947 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.257546174532435e-07, "loss": 0.3997, "step": 14948 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.251165959036697e-07, "loss": 0.3405, "step": 14949 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.24478951281191e-07, "loss": 0.3906, "step": 14950 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 5.238416836111715e-07, "loss": 0.4309, "step": 14951 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.23204792918961e-07, "loss": 0.376, "step": 14952 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.225682792299003e-07, "loss": 0.4361, "step": 14953 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.219321425693091e-07, "loss": 0.4164, "step": 14954 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.21296382962494e-07, "loss": 0.4642, "step": 14955 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.206610004347468e-07, "loss": 0.4376, "step": 14956 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.200259950113429e-07, "loss": 0.3582, "step": 14957 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.193913667175466e-07, "loss": 0.3537, "step": 14958 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.187571155786019e-07, "loss": 0.446, "step": 14959 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.181232416197423e-07, "loss": 0.3945, "step": 14960 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.174897448661831e-07, "loss": 0.4565, "step": 14961 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.168566253431262e-07, "loss": 0.4226, "step": 14962 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.162238830757583e-07, "loss": 0.4256, "step": 14963 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.15591518089249e-07, "loss": 0.4813, "step": 14964 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.149595304087607e-07, "loss": 0.4463, "step": 14965 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.143279200594286e-07, "loss": 0.4166, "step": 14966 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.136966870663828e-07, "loss": 0.4349, "step": 14967 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.130658314547332e-07, "loss": 0.3934, "step": 14968 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.124353532495752e-07, "loss": 0.4205, "step": 14969 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.118052524759931e-07, "loss": 0.4238, "step": 14970 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.111755291590526e-07, "loss": 0.4324, "step": 14971 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.105461833238068e-07, "loss": 0.4415, "step": 14972 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.09917214995288e-07, "loss": 0.3606, "step": 14973 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.092886241985184e-07, "loss": 0.3472, "step": 14974 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.08660410958508e-07, "loss": 0.4522, "step": 14975 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.080325753002446e-07, "loss": 0.375, "step": 14976 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.074051172487071e-07, "loss": 0.3958, "step": 14977 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.067780368288555e-07, "loss": 0.4472, "step": 14978 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.061513340656366e-07, "loss": 0.3756, "step": 14979 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.055250089839814e-07, "loss": 0.4994, "step": 14980 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.048990616088057e-07, "loss": 0.4683, "step": 14981 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.042734919650117e-07, "loss": 0.4033, "step": 14982 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.036483000774861e-07, "loss": 0.4015, "step": 14983 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.03023485971098e-07, "loss": 0.4127, "step": 14984 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.023990496707043e-07, "loss": 0.4415, "step": 14985 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.017749912011482e-07, "loss": 0.4061, "step": 14986 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.011513105872546e-07, "loss": 0.4671, "step": 14987 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 5.005280078538344e-07, "loss": 0.4096, "step": 14988 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.999050830256824e-07, "loss": 0.4465, "step": 14989 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.992825361275799e-07, "loss": 0.4821, "step": 14990 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.986603671842949e-07, "loss": 0.4162, "step": 14991 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.980385762205775e-07, "loss": 0.4401, "step": 14992 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.974171632611624e-07, "loss": 0.4906, "step": 14993 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.967961283307721e-07, "loss": 0.3893, "step": 14994 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.961754714541122e-07, "loss": 0.4702, "step": 14995 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.955551926558722e-07, "loss": 0.4124, "step": 14996 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.949352919607286e-07, "loss": 0.4133, "step": 14997 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.94315769393342e-07, "loss": 0.3321, "step": 14998 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.936966249783592e-07, "loss": 0.4364, "step": 14999 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.930778587404094e-07, "loss": 0.3856, "step": 15000 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.924594707041075e-07, "loss": 0.4436, "step": 15001 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.918414608940558e-07, "loss": 0.4761, "step": 15002 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.912238293348414e-07, "loss": 0.3489, "step": 15003 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.906065760510304e-07, "loss": 0.3705, "step": 15004 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.899897010671806e-07, "loss": 0.3849, "step": 15005 }, { "epoch": 2.71, "grad_norm": 0.0, "learning_rate": 4.893732044078303e-07, "loss": 0.3787, "step": 15006 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.887570860975088e-07, "loss": 0.3905, "step": 15007 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.881413461607232e-07, "loss": 0.4118, "step": 15008 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.875259846219704e-07, "loss": 0.4866, "step": 15009 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.86911001505731e-07, "loss": 0.3937, "step": 15010 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.862963968364653e-07, "loss": 0.5129, "step": 15011 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.856821706386283e-07, "loss": 0.3773, "step": 15012 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.850683229366538e-07, "loss": 0.4407, "step": 15013 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.844548537549609e-07, "loss": 0.3904, "step": 15014 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.838417631179559e-07, "loss": 0.4674, "step": 15015 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.832290510500271e-07, "loss": 0.3551, "step": 15016 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.826167175755503e-07, "loss": 0.4312, "step": 15017 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.82004762718884e-07, "loss": 0.4543, "step": 15018 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.813931865043731e-07, "loss": 0.4547, "step": 15019 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.807819889563481e-07, "loss": 0.4572, "step": 15020 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.80171170099123e-07, "loss": 0.4625, "step": 15021 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.795607299569971e-07, "loss": 0.4604, "step": 15022 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.789506685542533e-07, "loss": 0.4449, "step": 15023 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.783409859151634e-07, "loss": 0.3862, "step": 15024 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.777316820639822e-07, "loss": 0.4254, "step": 15025 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.771227570249459e-07, "loss": 0.4008, "step": 15026 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.765142108222798e-07, "loss": 0.4449, "step": 15027 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.759060434801921e-07, "loss": 0.4243, "step": 15028 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.752982550228791e-07, "loss": 0.4574, "step": 15029 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.7469084547451804e-07, "loss": 0.4109, "step": 15030 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.7408381485927304e-07, "loss": 0.3563, "step": 15031 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.7347716320129135e-07, "loss": 0.4539, "step": 15032 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.7287089052470925e-07, "loss": 0.5063, "step": 15033 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.7226499685364413e-07, "loss": 0.4886, "step": 15034 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.716594822121989e-07, "loss": 0.3858, "step": 15035 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.7105434662446214e-07, "loss": 0.4248, "step": 15036 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.7044959011450787e-07, "loss": 0.3472, "step": 15037 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.698452127063946e-07, "loss": 0.4159, "step": 15038 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.69241214424162e-07, "loss": 0.4465, "step": 15039 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6863759529184425e-07, "loss": 0.3394, "step": 15040 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.68034355333451e-07, "loss": 0.4037, "step": 15041 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.674314945729785e-07, "loss": 0.4499, "step": 15042 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6682901303441217e-07, "loss": 0.4601, "step": 15043 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6622691074171724e-07, "loss": 0.3619, "step": 15044 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6562518771885e-07, "loss": 0.3504, "step": 15045 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6502384398974586e-07, "loss": 0.4107, "step": 15046 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6442287957832677e-07, "loss": 0.5123, "step": 15047 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6382229450850357e-07, "loss": 0.4391, "step": 15048 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6322208880416164e-07, "loss": 0.3688, "step": 15049 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.626222624891852e-07, "loss": 0.4651, "step": 15050 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.620228155874329e-07, "loss": 0.4666, "step": 15051 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.614237481227524e-07, "loss": 0.4429, "step": 15052 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6082506011897563e-07, "loss": 0.4422, "step": 15053 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.6022675159991924e-07, "loss": 0.3744, "step": 15054 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.5962882258938634e-07, "loss": 0.448, "step": 15055 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.5903127311116123e-07, "loss": 0.383, "step": 15056 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.5843410318901717e-07, "loss": 0.4256, "step": 15057 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.578373128467106e-07, "loss": 0.4313, "step": 15058 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.5724090210798153e-07, "loss": 0.2971, "step": 15059 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.566448709965565e-07, "loss": 0.3864, "step": 15060 }, { "epoch": 2.72, "grad_norm": 0.0, "learning_rate": 4.5604921953614765e-07, "loss": 0.382, "step": 15061 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.554539477504505e-07, "loss": 0.4171, "step": 15062 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.5485905566314823e-07, "loss": 0.3368, "step": 15063 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.54264543297902e-07, "loss": 0.3487, "step": 15064 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.5367041067836626e-07, "loss": 0.4182, "step": 15065 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.530766578281731e-07, "loss": 0.4979, "step": 15066 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.5248328477094704e-07, "loss": 0.4693, "step": 15067 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.518902915302914e-07, "loss": 0.3994, "step": 15068 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.5129767812979617e-07, "loss": 0.4471, "step": 15069 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.5070544459303813e-07, "loss": 0.3865, "step": 15070 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.501135909435761e-07, "loss": 0.5036, "step": 15071 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.4952211720495574e-07, "loss": 0.4785, "step": 15072 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.48931023400706e-07, "loss": 0.4236, "step": 15073 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.4834030955434147e-07, "loss": 0.4837, "step": 15074 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.477499756893633e-07, "loss": 0.3685, "step": 15075 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.471600218292549e-07, "loss": 0.5118, "step": 15076 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.4657044799748415e-07, "loss": 0.3932, "step": 15077 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.4598125421750903e-07, "loss": 0.3778, "step": 15078 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.4539244051276633e-07, "loss": 0.4043, "step": 15079 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.4480400690667946e-07, "loss": 0.4248, "step": 15080 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.4421595342265867e-07, "loss": 0.4549, "step": 15081 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.436282800840952e-07, "loss": 0.4036, "step": 15082 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.430409869143715e-07, "loss": 0.4226, "step": 15083 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.4245407393684994e-07, "loss": 0.4491, "step": 15084 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.418675411748774e-07, "loss": 0.3788, "step": 15085 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.4128138865178973e-07, "loss": 0.422, "step": 15086 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.406956163909004e-07, "loss": 0.389, "step": 15087 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.401102244155175e-07, "loss": 0.3784, "step": 15088 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.395252127489258e-07, "loss": 0.4821, "step": 15089 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.389405814143999e-07, "loss": 0.3662, "step": 15090 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.383563304351968e-07, "loss": 0.4299, "step": 15091 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.3777245983455897e-07, "loss": 0.4198, "step": 15092 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.3718896963571345e-07, "loss": 0.384, "step": 15093 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.3660585986187276e-07, "loss": 0.4531, "step": 15094 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.360231305362328e-07, "loss": 0.4016, "step": 15095 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.354407816819783e-07, "loss": 0.4221, "step": 15096 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.3485881332227417e-07, "loss": 0.3593, "step": 15097 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.3427722548027074e-07, "loss": 0.3696, "step": 15098 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.336960181791072e-07, "loss": 0.3764, "step": 15099 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.3311519144190516e-07, "loss": 0.3709, "step": 15100 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.3253474529177053e-07, "loss": 0.3672, "step": 15101 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.319546797517926e-07, "loss": 0.4324, "step": 15102 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.313749948450463e-07, "loss": 0.3895, "step": 15103 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.307956905945965e-07, "loss": 0.3655, "step": 15104 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.3021676702348803e-07, "loss": 0.4433, "step": 15105 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.296382241547492e-07, "loss": 0.4004, "step": 15106 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.2906006201139716e-07, "loss": 0.4217, "step": 15107 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.2848228061643235e-07, "loss": 0.4274, "step": 15108 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.279048799928387e-07, "loss": 0.4966, "step": 15109 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.2732786016358665e-07, "loss": 0.3868, "step": 15110 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.2675122115163225e-07, "loss": 0.3668, "step": 15111 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.261749629799128e-07, "loss": 0.3662, "step": 15112 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.255990856713554e-07, "loss": 0.3236, "step": 15113 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.2502358924886746e-07, "loss": 0.3824, "step": 15114 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.244484737353427e-07, "loss": 0.4249, "step": 15115 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.238737391536651e-07, "loss": 0.4219, "step": 15116 }, { "epoch": 2.73, "grad_norm": 0.0, "learning_rate": 4.2329938552669205e-07, "loss": 0.4172, "step": 15117 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.227254128772762e-07, "loss": 0.5089, "step": 15118 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.221518212282505e-07, "loss": 0.4519, "step": 15119 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.215786106024311e-07, "loss": 0.4259, "step": 15120 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.2100578102262534e-07, "loss": 0.365, "step": 15121 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.204333325116183e-07, "loss": 0.3964, "step": 15122 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.198612650921852e-07, "loss": 0.3495, "step": 15123 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.1928957878708323e-07, "loss": 0.4616, "step": 15124 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.1871827361905203e-07, "loss": 0.4153, "step": 15125 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.181473496108224e-07, "loss": 0.4538, "step": 15126 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.17576806785106e-07, "loss": 0.4493, "step": 15127 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.170066451646004e-07, "loss": 0.4289, "step": 15128 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.164368647719874e-07, "loss": 0.42, "step": 15129 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.158674656299322e-07, "loss": 0.3845, "step": 15130 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.1529844776108885e-07, "loss": 0.357, "step": 15131 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.147298111880915e-07, "loss": 0.3329, "step": 15132 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.141615559335643e-07, "loss": 0.4287, "step": 15133 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.135936820201103e-07, "loss": 0.3441, "step": 15134 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.130261894703236e-07, "loss": 0.3436, "step": 15135 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.1245907830677614e-07, "loss": 0.4358, "step": 15136 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.118923485520321e-07, "loss": 0.4722, "step": 15137 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.1132600022863676e-07, "loss": 0.3586, "step": 15138 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.1076003335911997e-07, "loss": 0.4078, "step": 15139 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.1019444796599473e-07, "loss": 0.4256, "step": 15140 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0962924407176196e-07, "loss": 0.3616, "step": 15141 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0906442169890815e-07, "loss": 0.4211, "step": 15142 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0849998086990196e-07, "loss": 0.511, "step": 15143 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0793592160719764e-07, "loss": 0.4371, "step": 15144 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.073722439332339e-07, "loss": 0.3971, "step": 15145 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.068089478704362e-07, "loss": 0.4166, "step": 15146 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0624603344121214e-07, "loss": 0.4667, "step": 15147 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0568350066795605e-07, "loss": 0.5422, "step": 15148 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0512134957304663e-07, "loss": 0.385, "step": 15149 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.04559580178846e-07, "loss": 0.4105, "step": 15150 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.039981925077041e-07, "loss": 0.3942, "step": 15151 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0343718658195195e-07, "loss": 0.4091, "step": 15152 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0287656242390837e-07, "loss": 0.4975, "step": 15153 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.023163200558777e-07, "loss": 0.4057, "step": 15154 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.017564595001444e-07, "loss": 0.4108, "step": 15155 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.011969807789817e-07, "loss": 0.3815, "step": 15156 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.006378839146463e-07, "loss": 0.4031, "step": 15157 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 4.0007916892938034e-07, "loss": 0.3985, "step": 15158 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.995208358454106e-07, "loss": 0.3166, "step": 15159 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.9896288468494917e-07, "loss": 0.3727, "step": 15160 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.9840531547019167e-07, "loss": 0.4182, "step": 15161 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.978481282233204e-07, "loss": 0.4064, "step": 15162 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.9729132296649764e-07, "loss": 0.4422, "step": 15163 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.9673489972187786e-07, "loss": 0.4228, "step": 15164 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.9617885851159554e-07, "loss": 0.3735, "step": 15165 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.956231993577697e-07, "loss": 0.4427, "step": 15166 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.95067922282506e-07, "loss": 0.4411, "step": 15167 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.9451302730789454e-07, "loss": 0.4264, "step": 15168 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.9395851445600985e-07, "loss": 0.4693, "step": 15169 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.934043837489121e-07, "loss": 0.4085, "step": 15170 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.9285063520864476e-07, "loss": 0.4672, "step": 15171 }, { "epoch": 2.74, "grad_norm": 0.0, "learning_rate": 3.9229726885723686e-07, "loss": 0.3381, "step": 15172 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.91744284716703e-07, "loss": 0.4203, "step": 15173 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.911916828090401e-07, "loss": 0.4291, "step": 15174 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.906394631562338e-07, "loss": 0.4368, "step": 15175 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.9008762578025105e-07, "loss": 0.3742, "step": 15176 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.895361707030476e-07, "loss": 0.4126, "step": 15177 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.8898509794655704e-07, "loss": 0.3518, "step": 15178 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.8843440753270176e-07, "loss": 0.4567, "step": 15179 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.8788409948339323e-07, "loss": 0.3689, "step": 15180 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.873341738205216e-07, "loss": 0.4036, "step": 15181 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.8678463056596506e-07, "loss": 0.364, "step": 15182 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.8623546974158266e-07, "loss": 0.5259, "step": 15183 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.8568669136922364e-07, "loss": 0.4207, "step": 15184 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.8513829547071725e-07, "loss": 0.3932, "step": 15185 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.8459028206788153e-07, "loss": 0.4153, "step": 15186 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.840426511825157e-07, "loss": 0.4194, "step": 15187 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.834954028364069e-07, "loss": 0.4485, "step": 15188 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.829485370513253e-07, "loss": 0.4157, "step": 15189 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.824020538490236e-07, "loss": 0.4218, "step": 15190 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.818559532512467e-07, "loss": 0.4373, "step": 15191 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.81310235279716e-07, "loss": 0.3905, "step": 15192 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.80764899956142e-07, "loss": 0.4564, "step": 15193 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.8021994730221947e-07, "loss": 0.3819, "step": 15194 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.7967537733962446e-07, "loss": 0.5044, "step": 15195 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.791311900900252e-07, "loss": 0.4396, "step": 15196 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.785873855750688e-07, "loss": 0.4254, "step": 15197 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.78043963816388e-07, "loss": 0.3228, "step": 15198 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.775009248356032e-07, "loss": 0.4036, "step": 15199 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.7695826865431387e-07, "loss": 0.4578, "step": 15200 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.7641599529411155e-07, "loss": 0.3508, "step": 15201 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.7587410477656574e-07, "loss": 0.4458, "step": 15202 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.753325971232358e-07, "loss": 0.373, "step": 15203 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.747914723556623e-07, "loss": 0.4215, "step": 15204 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.742507304953735e-07, "loss": 0.4717, "step": 15205 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.737103715638812e-07, "loss": 0.3761, "step": 15206 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.731703955826804e-07, "loss": 0.4145, "step": 15207 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.726308025732528e-07, "loss": 0.3359, "step": 15208 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.720915925570645e-07, "loss": 0.4436, "step": 15209 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.715527655555662e-07, "loss": 0.4366, "step": 15210 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.710143215901929e-07, "loss": 0.3492, "step": 15211 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.704762606823653e-07, "loss": 0.3971, "step": 15212 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.699385828534885e-07, "loss": 0.4302, "step": 15213 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.69401288124952e-07, "loss": 0.4879, "step": 15214 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.688643765181321e-07, "loss": 0.4116, "step": 15215 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.6832784805438507e-07, "loss": 0.4569, "step": 15216 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.677917027550548e-07, "loss": 0.481, "step": 15217 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.672559406414733e-07, "loss": 0.4045, "step": 15218 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.667205617349512e-07, "loss": 0.4733, "step": 15219 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.6618556605678925e-07, "loss": 0.4133, "step": 15220 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.6565095362826817e-07, "loss": 0.3759, "step": 15221 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.651167244706566e-07, "loss": 0.4268, "step": 15222 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.645828786052075e-07, "loss": 0.4433, "step": 15223 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.6404941605315823e-07, "loss": 0.4956, "step": 15224 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.6351633683572973e-07, "loss": 0.3941, "step": 15225 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.629836409741294e-07, "loss": 0.4219, "step": 15226 }, { "epoch": 2.75, "grad_norm": 0.0, "learning_rate": 3.6245132848955033e-07, "loss": 0.3554, "step": 15227 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.6191939940316555e-07, "loss": 0.3869, "step": 15228 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.6138785373613815e-07, "loss": 0.3462, "step": 15229 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.6085669150961677e-07, "loss": 0.5421, "step": 15230 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.603259127447267e-07, "loss": 0.4388, "step": 15231 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.5979551746258557e-07, "loss": 0.3758, "step": 15232 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.59265505684292e-07, "loss": 0.5067, "step": 15233 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.5873587743093354e-07, "loss": 0.3987, "step": 15234 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.5820663272357894e-07, "loss": 0.3624, "step": 15235 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.576777715832813e-07, "loss": 0.3858, "step": 15236 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.571492940310806e-07, "loss": 0.357, "step": 15237 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.566212000879987e-07, "loss": 0.3503, "step": 15238 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.5609348977504675e-07, "loss": 0.4327, "step": 15239 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.555661631132157e-07, "loss": 0.4803, "step": 15240 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.5503922012348534e-07, "loss": 0.4689, "step": 15241 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.545126608268168e-07, "loss": 0.4344, "step": 15242 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.539864852441588e-07, "loss": 0.459, "step": 15243 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.534606933964435e-07, "loss": 0.4883, "step": 15244 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.529352853045864e-07, "loss": 0.4118, "step": 15245 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.524102609894897e-07, "loss": 0.429, "step": 15246 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.518856204720411e-07, "loss": 0.4246, "step": 15247 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.513613637731095e-07, "loss": 0.3728, "step": 15248 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.5083749091355255e-07, "loss": 0.4569, "step": 15249 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.503140019142093e-07, "loss": 0.407, "step": 15250 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.497908967959063e-07, "loss": 0.397, "step": 15251 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.492681755794536e-07, "loss": 0.4466, "step": 15252 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.487458382856468e-07, "loss": 0.3334, "step": 15253 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.482238849352626e-07, "loss": 0.3927, "step": 15254 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4770231554906555e-07, "loss": 0.4351, "step": 15255 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4718113014780676e-07, "loss": 0.3946, "step": 15256 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4666032875221966e-07, "loss": 0.4341, "step": 15257 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4613991138302104e-07, "loss": 0.3025, "step": 15258 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4561987806091435e-07, "loss": 0.3484, "step": 15259 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4510022880658857e-07, "loss": 0.4923, "step": 15260 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.445809636407149e-07, "loss": 0.489, "step": 15261 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4406208258395135e-07, "loss": 0.4042, "step": 15262 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.435435856569391e-07, "loss": 0.3863, "step": 15263 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.430254728803062e-07, "loss": 0.4334, "step": 15264 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.425077442746627e-07, "loss": 0.4595, "step": 15265 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4199039986060556e-07, "loss": 0.3974, "step": 15266 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4147343965871495e-07, "loss": 0.3844, "step": 15267 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.4095686368955883e-07, "loss": 0.3758, "step": 15268 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.404406719736841e-07, "loss": 0.3456, "step": 15269 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.3992486453162774e-07, "loss": 0.4283, "step": 15270 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.3940944138390665e-07, "loss": 0.3827, "step": 15271 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.388944025510299e-07, "loss": 0.4429, "step": 15272 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.383797480534834e-07, "loss": 0.4022, "step": 15273 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.3786547791174187e-07, "loss": 0.3292, "step": 15274 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.373515921462645e-07, "loss": 0.409, "step": 15275 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.3683809077749265e-07, "loss": 0.3708, "step": 15276 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.363249738258556e-07, "loss": 0.4809, "step": 15277 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.3581224131176706e-07, "loss": 0.3461, "step": 15278 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.3529989325562176e-07, "loss": 0.4264, "step": 15279 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.347879296778045e-07, "loss": 0.4291, "step": 15280 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.3427635059868013e-07, "loss": 0.3959, "step": 15281 }, { "epoch": 2.76, "grad_norm": 0.0, "learning_rate": 3.3376515603860126e-07, "loss": 0.4059, "step": 15282 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.332543460179027e-07, "loss": 0.3669, "step": 15283 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.327439205569072e-07, "loss": 0.4228, "step": 15284 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.322338796759195e-07, "loss": 0.4511, "step": 15285 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.3172422339523005e-07, "loss": 0.4787, "step": 15286 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.3121495173511266e-07, "loss": 0.4568, "step": 15287 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.307060647158278e-07, "loss": 0.4157, "step": 15288 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.3019756235762147e-07, "loss": 0.4375, "step": 15289 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.296894446807219e-07, "loss": 0.388, "step": 15290 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2918171170534085e-07, "loss": 0.4426, "step": 15291 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2867436345167867e-07, "loss": 0.3762, "step": 15292 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.281673999399171e-07, "loss": 0.3425, "step": 15293 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.276608211902266e-07, "loss": 0.3461, "step": 15294 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2715462722275883e-07, "loss": 0.3638, "step": 15295 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2664881805764883e-07, "loss": 0.4265, "step": 15296 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2614339371502155e-07, "loss": 0.4836, "step": 15297 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2563835421498216e-07, "loss": 0.3771, "step": 15298 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.251336995776222e-07, "loss": 0.5181, "step": 15299 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.246294298230168e-07, "loss": 0.447, "step": 15300 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2412554497122774e-07, "loss": 0.4196, "step": 15301 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.236220450423011e-07, "loss": 0.5079, "step": 15302 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2311893005626536e-07, "loss": 0.3784, "step": 15303 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2261620003313455e-07, "loss": 0.3985, "step": 15304 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2211385499291147e-07, "loss": 0.4561, "step": 15305 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.216118949555791e-07, "loss": 0.3823, "step": 15306 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2111031994110473e-07, "loss": 0.4322, "step": 15307 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.2060912996944247e-07, "loss": 0.4693, "step": 15308 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.201083250605297e-07, "loss": 0.453, "step": 15309 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1960790523429044e-07, "loss": 0.4906, "step": 15310 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.191078705106332e-07, "loss": 0.434, "step": 15311 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1860822090944876e-07, "loss": 0.4579, "step": 15312 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1810895645061566e-07, "loss": 0.4402, "step": 15313 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.176100771539925e-07, "loss": 0.39, "step": 15314 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.171115830394278e-07, "loss": 0.3983, "step": 15315 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1661347412675237e-07, "loss": 0.4997, "step": 15316 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1611575043578145e-07, "loss": 0.443, "step": 15317 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1561841198631594e-07, "loss": 0.3462, "step": 15318 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1512145879813996e-07, "loss": 0.3751, "step": 15319 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.146248908910221e-07, "loss": 0.4535, "step": 15320 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1412870828471995e-07, "loss": 0.472, "step": 15321 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1363291099896996e-07, "loss": 0.3587, "step": 15322 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.131374990534963e-07, "loss": 0.4684, "step": 15323 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.126424724680077e-07, "loss": 0.441, "step": 15324 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1214783126219504e-07, "loss": 0.441, "step": 15325 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1165357545573926e-07, "loss": 0.3925, "step": 15326 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.111597050683024e-07, "loss": 0.4518, "step": 15327 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.1066622011952986e-07, "loss": 0.4131, "step": 15328 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.101731206290537e-07, "loss": 0.5296, "step": 15329 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.0968040661648937e-07, "loss": 0.4076, "step": 15330 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.0918807810144e-07, "loss": 0.3707, "step": 15331 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.0869613510348896e-07, "loss": 0.4002, "step": 15332 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.0820457764220937e-07, "loss": 0.3749, "step": 15333 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.0771340573715336e-07, "loss": 0.3541, "step": 15334 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.0722261940786314e-07, "loss": 0.39, "step": 15335 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.067322186738608e-07, "loss": 0.3464, "step": 15336 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.062422035546564e-07, "loss": 0.4817, "step": 15337 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 3.0575257406974423e-07, "loss": 0.3908, "step": 15338 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.05263330238601e-07, "loss": 0.3514, "step": 15339 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.0477447208069113e-07, "loss": 0.3711, "step": 15340 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.0428599961546126e-07, "loss": 0.4525, "step": 15341 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.037979128623425e-07, "loss": 0.4137, "step": 15342 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.0331021184075607e-07, "loss": 0.3073, "step": 15343 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.028228965700997e-07, "loss": 0.3373, "step": 15344 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.0233596706976007e-07, "loss": 0.3786, "step": 15345 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.018494233591096e-07, "loss": 0.4684, "step": 15346 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.0136326545750047e-07, "loss": 0.3524, "step": 15347 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.008774933842773e-07, "loss": 0.3186, "step": 15348 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 3.0039210715876343e-07, "loss": 0.4032, "step": 15349 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.999071068002679e-07, "loss": 0.3997, "step": 15350 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.994224923280853e-07, "loss": 0.4174, "step": 15351 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9893826376149125e-07, "loss": 0.4136, "step": 15352 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9845442111975376e-07, "loss": 0.4763, "step": 15353 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.979709644221196e-07, "loss": 0.3525, "step": 15354 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9748789368782006e-07, "loss": 0.3928, "step": 15355 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.970052089360731e-07, "loss": 0.4457, "step": 15356 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9652291018608226e-07, "loss": 0.5242, "step": 15357 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.960409974570311e-07, "loss": 0.4039, "step": 15358 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.955594707680953e-07, "loss": 0.3851, "step": 15359 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9507833013842747e-07, "loss": 0.4989, "step": 15360 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9459757558716885e-07, "loss": 0.506, "step": 15361 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9411720713344525e-07, "loss": 0.4685, "step": 15362 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9363722479636483e-07, "loss": 0.3936, "step": 15363 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.931576285950244e-07, "loss": 0.2873, "step": 15364 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.926784185485032e-07, "loss": 0.3949, "step": 15365 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.921995946758638e-07, "loss": 0.362, "step": 15366 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9172115699615535e-07, "loss": 0.418, "step": 15367 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.9124310552840816e-07, "loss": 0.4538, "step": 15368 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.907654402916438e-07, "loss": 0.4937, "step": 15369 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.902881613048636e-07, "loss": 0.4669, "step": 15370 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8981126858705357e-07, "loss": 0.4003, "step": 15371 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8933476215718515e-07, "loss": 0.4221, "step": 15372 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.888586420342154e-07, "loss": 0.449, "step": 15373 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.883829082370859e-07, "loss": 0.4403, "step": 15374 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8790756078472036e-07, "loss": 0.4031, "step": 15375 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.874325996960303e-07, "loss": 0.396, "step": 15376 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8695802498990957e-07, "loss": 0.4337, "step": 15377 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8648383668523737e-07, "loss": 0.4487, "step": 15378 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8601003480087984e-07, "loss": 0.5395, "step": 15379 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.855366193556819e-07, "loss": 0.4141, "step": 15380 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8506359036848176e-07, "loss": 0.418, "step": 15381 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8459094785809327e-07, "loss": 0.5128, "step": 15382 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.841186918433203e-07, "loss": 0.4455, "step": 15383 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8364682234295e-07, "loss": 0.3899, "step": 15384 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.8317533937575303e-07, "loss": 0.3983, "step": 15385 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.827042429604876e-07, "loss": 0.4184, "step": 15386 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.822335331158954e-07, "loss": 0.4403, "step": 15387 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.817632098606993e-07, "loss": 0.3384, "step": 15388 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.81293273213612e-07, "loss": 0.4286, "step": 15389 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.808237231933264e-07, "loss": 0.3773, "step": 15390 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.803545598185231e-07, "loss": 0.425, "step": 15391 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.7988578310786605e-07, "loss": 0.3732, "step": 15392 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 2.794173930800048e-07, "loss": 0.4551, "step": 15393 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.789493897535711e-07, "loss": 0.4444, "step": 15394 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.784817731471845e-07, "loss": 0.336, "step": 15395 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7801454327944676e-07, "loss": 0.4015, "step": 15396 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.775477001689442e-07, "loss": 0.4359, "step": 15397 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7708124383425074e-07, "loss": 0.3634, "step": 15398 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.766151742939216e-07, "loss": 0.4583, "step": 15399 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.761494915664975e-07, "loss": 0.4182, "step": 15400 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.756841956705036e-07, "loss": 0.3642, "step": 15401 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7521928662445186e-07, "loss": 0.3829, "step": 15402 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7475476444683626e-07, "loss": 0.4733, "step": 15403 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7429062915613756e-07, "loss": 0.3993, "step": 15404 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7382688077081774e-07, "loss": 0.4565, "step": 15405 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.733635193093265e-07, "loss": 0.3169, "step": 15406 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7290054479009675e-07, "loss": 0.3786, "step": 15407 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.724379572315472e-07, "loss": 0.3458, "step": 15408 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.719757566520798e-07, "loss": 0.4106, "step": 15409 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7151394307008195e-07, "loss": 0.3866, "step": 15410 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7105251650392574e-07, "loss": 0.4469, "step": 15411 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.7059147697196644e-07, "loss": 0.4452, "step": 15412 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.701308244925449e-07, "loss": 0.4836, "step": 15413 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.6967055908398874e-07, "loss": 0.4176, "step": 15414 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.692106807646067e-07, "loss": 0.3894, "step": 15415 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.6875118955269286e-07, "loss": 0.3861, "step": 15416 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.682920854665261e-07, "loss": 0.4463, "step": 15417 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.678333685243728e-07, "loss": 0.5271, "step": 15418 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.6737503874448066e-07, "loss": 0.4396, "step": 15419 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.669170961450829e-07, "loss": 0.4329, "step": 15420 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.6645954074439483e-07, "loss": 0.3636, "step": 15421 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.660023725606209e-07, "loss": 0.4248, "step": 15422 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.655455916119487e-07, "loss": 0.3953, "step": 15423 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.650891979165493e-07, "loss": 0.5559, "step": 15424 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.646331914925782e-07, "loss": 0.334, "step": 15425 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.641775723581763e-07, "loss": 0.4788, "step": 15426 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.637223405314682e-07, "loss": 0.3119, "step": 15427 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.632674960305648e-07, "loss": 0.3788, "step": 15428 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.6281303887356167e-07, "loss": 0.4579, "step": 15429 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.623589690785355e-07, "loss": 0.3944, "step": 15430 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.6190528666355073e-07, "loss": 0.4352, "step": 15431 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.6145199164665626e-07, "loss": 0.3596, "step": 15432 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.6099908404588534e-07, "loss": 0.435, "step": 15433 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.605465638792537e-07, "loss": 0.4975, "step": 15434 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.600944311647646e-07, "loss": 0.4556, "step": 15435 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.5964268592040486e-07, "loss": 0.3963, "step": 15436 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.591913281641445e-07, "loss": 0.3379, "step": 15437 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.5874035791394026e-07, "loss": 0.4633, "step": 15438 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.5828977518773225e-07, "loss": 0.3946, "step": 15439 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.57839580003445e-07, "loss": 0.4258, "step": 15440 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.573897723789898e-07, "loss": 0.3743, "step": 15441 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.5694035233226e-07, "loss": 0.3903, "step": 15442 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.564913198811325e-07, "loss": 0.3885, "step": 15443 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.560426750434708e-07, "loss": 0.4379, "step": 15444 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.55594417837125e-07, "loss": 0.5512, "step": 15445 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.5514654827992647e-07, "loss": 0.4153, "step": 15446 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.54699066389692e-07, "loss": 0.4898, "step": 15447 }, { "epoch": 2.79, "grad_norm": 0.0, "learning_rate": 2.5425197218422294e-07, "loss": 0.4673, "step": 15448 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.5380526568130613e-07, "loss": 0.4871, "step": 15449 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.53358946898713e-07, "loss": 0.4136, "step": 15450 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.529130158541959e-07, "loss": 0.3492, "step": 15451 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.5246747256549854e-07, "loss": 0.4219, "step": 15452 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.520223170503422e-07, "loss": 0.3716, "step": 15453 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.515775493264383e-07, "loss": 0.4186, "step": 15454 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.5113316941147824e-07, "loss": 0.3468, "step": 15455 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.5068917732314234e-07, "loss": 0.4763, "step": 15456 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.502455730790931e-07, "loss": 0.4386, "step": 15457 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.498023566969765e-07, "loss": 0.4789, "step": 15458 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.49359528194425e-07, "loss": 0.3733, "step": 15459 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.489170875890545e-07, "loss": 0.4902, "step": 15460 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.484750348984677e-07, "loss": 0.3988, "step": 15461 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.480333701402493e-07, "loss": 0.4046, "step": 15462 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.4759209333196977e-07, "loss": 0.3978, "step": 15463 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.4715120449118277e-07, "loss": 0.418, "step": 15464 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.467107036354288e-07, "loss": 0.3728, "step": 15465 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.462705907822316e-07, "loss": 0.39, "step": 15466 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.4583086594909934e-07, "loss": 0.3966, "step": 15467 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.453915291535247e-07, "loss": 0.4006, "step": 15468 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.4495258041298597e-07, "loss": 0.3386, "step": 15469 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.445140197449447e-07, "loss": 0.4585, "step": 15470 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.4407584716684804e-07, "loss": 0.478, "step": 15471 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.4363806269612655e-07, "loss": 0.4068, "step": 15472 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.4320066635019734e-07, "loss": 0.392, "step": 15473 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.4276365814645874e-07, "loss": 0.3608, "step": 15474 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.4232703810229794e-07, "loss": 0.3781, "step": 15475 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.418908062350833e-07, "loss": 0.3566, "step": 15476 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.414549625621676e-07, "loss": 0.4916, "step": 15477 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.410195071008914e-07, "loss": 0.436, "step": 15478 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.405844398685775e-07, "loss": 0.4341, "step": 15479 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.401497608825343e-07, "loss": 0.3406, "step": 15480 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3971547016005345e-07, "loss": 0.3786, "step": 15481 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3928156771840906e-07, "loss": 0.3979, "step": 15482 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3884805357486604e-07, "loss": 0.4519, "step": 15483 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3841492774666964e-07, "loss": 0.3932, "step": 15484 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.379821902510504e-07, "loss": 0.4602, "step": 15485 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.375498411052224e-07, "loss": 0.4648, "step": 15486 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3711788032638517e-07, "loss": 0.3855, "step": 15487 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3668630793172386e-07, "loss": 0.3512, "step": 15488 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.36255123938407e-07, "loss": 0.4381, "step": 15489 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.358243283635875e-07, "loss": 0.4439, "step": 15490 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3539392122440275e-07, "loss": 0.4125, "step": 15491 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3496390253797575e-07, "loss": 0.4207, "step": 15492 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.345342723214128e-07, "loss": 0.4252, "step": 15493 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.341050305918058e-07, "loss": 0.4404, "step": 15494 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3367617736623104e-07, "loss": 0.4628, "step": 15495 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.332477126617483e-07, "loss": 0.4041, "step": 15496 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3281963649540384e-07, "loss": 0.4522, "step": 15497 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.3239194888422413e-07, "loss": 0.3919, "step": 15498 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.319646498452266e-07, "loss": 0.343, "step": 15499 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.315377393954099e-07, "loss": 0.4041, "step": 15500 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.311112175517549e-07, "loss": 0.3961, "step": 15501 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.306850843312325e-07, "loss": 0.4102, "step": 15502 }, { "epoch": 2.8, "grad_norm": 0.0, "learning_rate": 2.302593397507913e-07, "loss": 0.5082, "step": 15503 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2983398382737e-07, "loss": 0.3997, "step": 15504 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.294090165778906e-07, "loss": 0.386, "step": 15505 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2898443801925963e-07, "loss": 0.4895, "step": 15506 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2856024816836464e-07, "loss": 0.4406, "step": 15507 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2813644704208328e-07, "loss": 0.4318, "step": 15508 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2771303465727312e-07, "loss": 0.4527, "step": 15509 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2729001103078184e-07, "loss": 0.3902, "step": 15510 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2686737617943377e-07, "loss": 0.4354, "step": 15511 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.264451301200432e-07, "loss": 0.5151, "step": 15512 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2602327286941007e-07, "loss": 0.407, "step": 15513 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2560180444431202e-07, "loss": 0.4394, "step": 15514 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.251807248615212e-07, "loss": 0.4189, "step": 15515 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2476003413778537e-07, "loss": 0.4239, "step": 15516 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2433973228984217e-07, "loss": 0.4865, "step": 15517 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2391981933441053e-07, "loss": 0.4354, "step": 15518 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2350029528819594e-07, "loss": 0.4373, "step": 15519 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2308116016788729e-07, "loss": 0.4673, "step": 15520 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2266241399015896e-07, "loss": 0.3519, "step": 15521 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2224405677166994e-07, "loss": 0.3725, "step": 15522 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.218260885290613e-07, "loss": 0.3708, "step": 15523 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2140850927896306e-07, "loss": 0.3756, "step": 15524 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2099131903798531e-07, "loss": 0.4371, "step": 15525 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.2057451782272588e-07, "loss": 0.3365, "step": 15526 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.201581056497648e-07, "loss": 0.4001, "step": 15527 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1974208253566775e-07, "loss": 0.3984, "step": 15528 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.193264484969859e-07, "loss": 0.4372, "step": 15529 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1891120355025276e-07, "loss": 0.4929, "step": 15530 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1849634771198724e-07, "loss": 0.4137, "step": 15531 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1808188099869398e-07, "loss": 0.3666, "step": 15532 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.17667803426862e-07, "loss": 0.4379, "step": 15533 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1725411501296145e-07, "loss": 0.4686, "step": 15534 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.168408157734525e-07, "loss": 0.4228, "step": 15535 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1642790572477312e-07, "loss": 0.3982, "step": 15536 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.160153848833535e-07, "loss": 0.4598, "step": 15537 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.156032532656016e-07, "loss": 0.4063, "step": 15538 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1519151088791545e-07, "loss": 0.372, "step": 15539 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1478015776667303e-07, "loss": 0.3592, "step": 15540 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1436919391823797e-07, "loss": 0.4303, "step": 15541 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1395861935896045e-07, "loss": 0.3881, "step": 15542 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.13548434105173e-07, "loss": 0.4394, "step": 15543 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.131386381731948e-07, "loss": 0.4094, "step": 15544 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1272923157932722e-07, "loss": 0.4649, "step": 15545 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1232021433985727e-07, "loss": 0.4597, "step": 15546 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1191158647105525e-07, "loss": 0.4799, "step": 15547 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1150334798918037e-07, "loss": 0.4674, "step": 15548 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.1109549891047077e-07, "loss": 0.4718, "step": 15549 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.106880392511501e-07, "loss": 0.415, "step": 15550 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.102809690274299e-07, "loss": 0.3981, "step": 15551 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.098742882555027e-07, "loss": 0.4639, "step": 15552 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.0946799695154897e-07, "loss": 0.3804, "step": 15553 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.0906209513173014e-07, "loss": 0.4388, "step": 15554 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.0865658281219446e-07, "loss": 0.4705, "step": 15555 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.0825146000907236e-07, "loss": 0.4283, "step": 15556 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.0784672673848094e-07, "loss": 0.4003, "step": 15557 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.0744238301652286e-07, "loss": 0.4098, "step": 15558 }, { "epoch": 2.81, "grad_norm": 0.0, "learning_rate": 2.0703842885928194e-07, "loss": 0.3387, "step": 15559 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0663486428282865e-07, "loss": 0.3787, "step": 15560 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.062316893032168e-07, "loss": 0.3486, "step": 15561 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.058289039364869e-07, "loss": 0.4131, "step": 15562 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0542650819866172e-07, "loss": 0.4165, "step": 15563 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0502450210574843e-07, "loss": 0.3684, "step": 15564 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0462288567373977e-07, "loss": 0.4049, "step": 15565 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0422165891861413e-07, "loss": 0.3562, "step": 15566 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0382082185633201e-07, "loss": 0.3655, "step": 15567 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0342037450283847e-07, "loss": 0.4854, "step": 15568 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.030203168740641e-07, "loss": 0.4404, "step": 15569 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0262064898592615e-07, "loss": 0.3595, "step": 15570 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0222137085432414e-07, "loss": 0.4057, "step": 15571 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.018224824951387e-07, "loss": 0.4567, "step": 15572 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0142398392423935e-07, "loss": 0.4459, "step": 15573 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0102587515748006e-07, "loss": 0.4505, "step": 15574 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.006281562106993e-07, "loss": 0.3317, "step": 15575 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 2.0023082709971663e-07, "loss": 0.3191, "step": 15576 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9983388784034052e-07, "loss": 0.4598, "step": 15577 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9943733844836056e-07, "loss": 0.4622, "step": 15578 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.990411789395519e-07, "loss": 0.4579, "step": 15579 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9864540932967636e-07, "loss": 0.38, "step": 15580 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9825002963447582e-07, "loss": 0.4384, "step": 15581 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9785503986968102e-07, "loss": 0.4413, "step": 15582 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9746044005100497e-07, "loss": 0.3743, "step": 15583 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9706623019414505e-07, "loss": 0.4289, "step": 15584 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9667241031478323e-07, "loss": 0.3888, "step": 15585 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9627898042858807e-07, "loss": 0.4253, "step": 15586 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9588594055121035e-07, "loss": 0.4104, "step": 15587 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9549329069828426e-07, "loss": 0.4137, "step": 15588 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9510103088543064e-07, "loss": 0.3858, "step": 15589 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9470916112825477e-07, "loss": 0.3911, "step": 15590 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9431768144234643e-07, "loss": 0.3825, "step": 15591 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9392659184327867e-07, "loss": 0.3493, "step": 15592 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.935358923466102e-07, "loss": 0.4139, "step": 15593 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.93145582967883e-07, "loss": 0.3992, "step": 15594 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9275566372262356e-07, "loss": 0.4998, "step": 15595 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9236613462634614e-07, "loss": 0.3372, "step": 15596 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9197699569454497e-07, "loss": 0.3731, "step": 15597 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9158824694269995e-07, "loss": 0.3601, "step": 15598 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9119988838627866e-07, "loss": 0.4302, "step": 15599 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.908119200407288e-07, "loss": 0.4436, "step": 15600 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9042434192148352e-07, "loss": 0.4267, "step": 15601 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.9003715404396384e-07, "loss": 0.3507, "step": 15602 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8965035642357188e-07, "loss": 0.4595, "step": 15603 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8926394907569422e-07, "loss": 0.4162, "step": 15604 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8887793201570415e-07, "loss": 0.5189, "step": 15605 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.884923052589549e-07, "loss": 0.431, "step": 15606 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8810706882079199e-07, "loss": 0.4415, "step": 15607 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8772222271653874e-07, "loss": 0.3841, "step": 15608 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.873377669615051e-07, "loss": 0.4545, "step": 15609 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8695370157098435e-07, "loss": 0.3987, "step": 15610 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8657002656025435e-07, "loss": 0.4138, "step": 15611 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8618674194458175e-07, "loss": 0.4059, "step": 15612 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8580384773921324e-07, "loss": 0.3972, "step": 15613 }, { "epoch": 2.82, "grad_norm": 0.0, "learning_rate": 1.8542134395937882e-07, "loss": 0.3924, "step": 15614 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.8503923062029748e-07, "loss": 0.4734, "step": 15615 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.8465750773716928e-07, "loss": 0.5004, "step": 15616 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.8427617532518093e-07, "loss": 0.421, "step": 15617 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.8389523339950033e-07, "loss": 0.4092, "step": 15618 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.835146819752842e-07, "loss": 0.3821, "step": 15619 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.8313452106766937e-07, "loss": 0.413, "step": 15620 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.827547506917815e-07, "loss": 0.4175, "step": 15621 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.823753708627274e-07, "loss": 0.4382, "step": 15622 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.819963815955983e-07, "loss": 0.4571, "step": 15623 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.816177829054744e-07, "loss": 0.4072, "step": 15624 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.8123957480741472e-07, "loss": 0.4062, "step": 15625 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.8086175731646394e-07, "loss": 0.4448, "step": 15626 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.8048433044765445e-07, "loss": 0.4556, "step": 15627 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.8010729421599872e-07, "loss": 0.4186, "step": 15628 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7973064863649804e-07, "loss": 0.391, "step": 15629 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7935439372413599e-07, "loss": 0.495, "step": 15630 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.789785294938806e-07, "loss": 0.4208, "step": 15631 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7860305596068218e-07, "loss": 0.368, "step": 15632 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.782279731394787e-07, "loss": 0.4654, "step": 15633 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7785328104519274e-07, "loss": 0.3684, "step": 15634 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7747897969272898e-07, "loss": 0.384, "step": 15635 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7710506909697888e-07, "loss": 0.3944, "step": 15636 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7673154927281722e-07, "loss": 0.3792, "step": 15637 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7635842023510208e-07, "loss": 0.4056, "step": 15638 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7598568199867716e-07, "loss": 0.3859, "step": 15639 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7561333457837283e-07, "loss": 0.3603, "step": 15640 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7524137798899942e-07, "loss": 0.4394, "step": 15641 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7486981224535403e-07, "loss": 0.4212, "step": 15642 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7449863736221928e-07, "loss": 0.431, "step": 15643 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.741278533543611e-07, "loss": 0.4304, "step": 15644 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7375746023652883e-07, "loss": 0.4402, "step": 15645 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7338745802345957e-07, "loss": 0.4345, "step": 15646 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7301784672987044e-07, "loss": 0.3794, "step": 15647 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7264862637046632e-07, "loss": 0.4068, "step": 15648 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7227979695993323e-07, "loss": 0.453, "step": 15649 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7191135851294726e-07, "loss": 0.5084, "step": 15650 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7154331104416332e-07, "loss": 0.491, "step": 15651 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7117565456822304e-07, "loss": 0.4003, "step": 15652 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.708083890997547e-07, "loss": 0.4553, "step": 15653 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7044151465336556e-07, "loss": 0.4007, "step": 15654 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.7007503124365165e-07, "loss": 0.3976, "step": 15655 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6970893888519357e-07, "loss": 0.4062, "step": 15656 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6934323759255412e-07, "loss": 0.3982, "step": 15657 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6897792738028163e-07, "loss": 0.4125, "step": 15658 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6861300826290784e-07, "loss": 0.412, "step": 15659 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.682484802549511e-07, "loss": 0.4555, "step": 15660 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6788434337091207e-07, "loss": 0.4644, "step": 15661 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6752059762527806e-07, "loss": 0.4045, "step": 15662 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6715724303251857e-07, "loss": 0.438, "step": 15663 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.667942796070876e-07, "loss": 0.494, "step": 15664 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6643170736342585e-07, "loss": 0.4002, "step": 15665 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6606952631595619e-07, "loss": 0.4854, "step": 15666 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6570773647908823e-07, "loss": 0.4349, "step": 15667 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6534633786721267e-07, "loss": 0.4171, "step": 15668 }, { "epoch": 2.83, "grad_norm": 0.0, "learning_rate": 1.6498533049470911e-07, "loss": 0.4049, "step": 15669 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.6462471437593607e-07, "loss": 0.3306, "step": 15670 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.64264489525241e-07, "loss": 0.3774, "step": 15671 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.6390465595695348e-07, "loss": 0.3968, "step": 15672 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.635452136853899e-07, "loss": 0.4013, "step": 15673 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.6318616272484767e-07, "loss": 0.528, "step": 15674 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.6282750308961204e-07, "loss": 0.4289, "step": 15675 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.624692347939505e-07, "loss": 0.5249, "step": 15676 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.6211135785211495e-07, "loss": 0.3629, "step": 15677 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.6175387227834293e-07, "loss": 0.4217, "step": 15678 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.6139677808685528e-07, "loss": 0.4888, "step": 15679 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.610400752918595e-07, "loss": 0.3786, "step": 15680 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.6068376390754315e-07, "loss": 0.3885, "step": 15681 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.6032784394808265e-07, "loss": 0.385, "step": 15682 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5997231542763668e-07, "loss": 0.4753, "step": 15683 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.596171783603506e-07, "loss": 0.4152, "step": 15684 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.592624327603498e-07, "loss": 0.4489, "step": 15685 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5890807864174628e-07, "loss": 0.4428, "step": 15686 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.585541160186388e-07, "loss": 0.4469, "step": 15687 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5820054490510827e-07, "loss": 0.3457, "step": 15688 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5784736531521905e-07, "loss": 0.442, "step": 15689 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.574945772630232e-07, "loss": 0.386, "step": 15690 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5714218076255394e-07, "loss": 0.3448, "step": 15691 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5679017582783118e-07, "loss": 0.4056, "step": 15692 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5643856247285706e-07, "loss": 0.4645, "step": 15693 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5608734071161925e-07, "loss": 0.3965, "step": 15694 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.55736510558091e-07, "loss": 0.39, "step": 15695 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5538607202622902e-07, "loss": 0.4953, "step": 15696 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5503602512997318e-07, "loss": 0.4543, "step": 15697 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5468636988324903e-07, "loss": 0.497, "step": 15698 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5433710629996878e-07, "loss": 0.3864, "step": 15699 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5398823439402466e-07, "loss": 0.3659, "step": 15700 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5363975417929445e-07, "loss": 0.4749, "step": 15701 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.532916656696437e-07, "loss": 0.4389, "step": 15702 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5294396887891806e-07, "loss": 0.393, "step": 15703 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5259666382095084e-07, "loss": 0.4111, "step": 15704 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.522497505095577e-07, "loss": 0.3847, "step": 15705 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.519032289585387e-07, "loss": 0.3945, "step": 15706 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.515570991816817e-07, "loss": 0.4287, "step": 15707 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5121136119275458e-07, "loss": 0.3475, "step": 15708 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5086601500550968e-07, "loss": 0.389, "step": 15709 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.505210606336882e-07, "loss": 0.4084, "step": 15710 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.5017649809101143e-07, "loss": 0.3977, "step": 15711 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4983232739118836e-07, "loss": 0.4013, "step": 15712 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4948854854790916e-07, "loss": 0.3821, "step": 15713 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.491451615748496e-07, "loss": 0.4369, "step": 15714 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4880216648567202e-07, "loss": 0.4139, "step": 15715 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4845956329402e-07, "loss": 0.4039, "step": 15716 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4811735201352262e-07, "loss": 0.3927, "step": 15717 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4777553265779455e-07, "loss": 0.3324, "step": 15718 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4743410524043378e-07, "loss": 0.4268, "step": 15719 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.470930697750217e-07, "loss": 0.4986, "step": 15720 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4675242627512742e-07, "loss": 0.4758, "step": 15721 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4641217475430126e-07, "loss": 0.4215, "step": 15722 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4607231522607901e-07, "loss": 0.4013, "step": 15723 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4573284770397988e-07, "loss": 0.45, "step": 15724 }, { "epoch": 2.84, "grad_norm": 0.0, "learning_rate": 1.4539377220150975e-07, "loss": 0.4455, "step": 15725 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4505508873215667e-07, "loss": 0.4199, "step": 15726 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4471679730939548e-07, "loss": 0.4779, "step": 15727 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4437889794668314e-07, "loss": 0.4682, "step": 15728 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4404139065746115e-07, "loss": 0.4518, "step": 15729 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4370427545515765e-07, "loss": 0.388, "step": 15730 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4336755235318188e-07, "loss": 0.3775, "step": 15731 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4303122136493098e-07, "loss": 0.3937, "step": 15732 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4269528250378416e-07, "loss": 0.3322, "step": 15733 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4235973578310526e-07, "loss": 0.4449, "step": 15734 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.420245812162435e-07, "loss": 0.4402, "step": 15735 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4168981881652943e-07, "loss": 0.3626, "step": 15736 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4135544859728567e-07, "loss": 0.4127, "step": 15737 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4102147057180938e-07, "loss": 0.4094, "step": 15738 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.406878847533888e-07, "loss": 0.3542, "step": 15739 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4035469115529333e-07, "loss": 0.4503, "step": 15740 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.4002188979077903e-07, "loss": 0.3785, "step": 15741 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.396894806730853e-07, "loss": 0.463, "step": 15742 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3935746381543603e-07, "loss": 0.4627, "step": 15743 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3902583923103842e-07, "loss": 0.4484, "step": 15744 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3869460693308635e-07, "loss": 0.4051, "step": 15745 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3836376693475483e-07, "loss": 0.4766, "step": 15746 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.380333192492078e-07, "loss": 0.3804, "step": 15747 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.377032638895892e-07, "loss": 0.4446, "step": 15748 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3737360086903074e-07, "loss": 0.4953, "step": 15749 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3704433020064522e-07, "loss": 0.3473, "step": 15750 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3671545189753333e-07, "loss": 0.4666, "step": 15751 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3638696597277678e-07, "loss": 0.448, "step": 15752 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3605887243944405e-07, "loss": 0.4193, "step": 15753 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.357311713105869e-07, "loss": 0.388, "step": 15754 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3540386259924266e-07, "loss": 0.4676, "step": 15755 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3507694631843203e-07, "loss": 0.4307, "step": 15756 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.347504224811591e-07, "loss": 0.4017, "step": 15757 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3442429110041455e-07, "loss": 0.3808, "step": 15758 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3409855218917356e-07, "loss": 0.4212, "step": 15759 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3377320576039354e-07, "loss": 0.4531, "step": 15760 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3344825182701637e-07, "loss": 0.4707, "step": 15761 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3312369040196947e-07, "loss": 0.446, "step": 15762 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3279952149816478e-07, "loss": 0.4639, "step": 15763 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.324757451284997e-07, "loss": 0.4767, "step": 15764 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3215236130585284e-07, "loss": 0.3954, "step": 15765 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.318293700430906e-07, "loss": 0.3994, "step": 15766 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3150677135306155e-07, "loss": 0.3956, "step": 15767 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3118456524859658e-07, "loss": 0.396, "step": 15768 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.308627517425176e-07, "loss": 0.4219, "step": 15769 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.3054133084762442e-07, "loss": 0.4114, "step": 15770 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.302203025767046e-07, "loss": 0.4227, "step": 15771 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.2989966694252897e-07, "loss": 0.3632, "step": 15772 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.2957942395785294e-07, "loss": 0.3411, "step": 15773 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.2925957363541518e-07, "loss": 0.4426, "step": 15774 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.2894011598794332e-07, "loss": 0.4019, "step": 15775 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.2862105102814272e-07, "loss": 0.4658, "step": 15776 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.2830237876870765e-07, "loss": 0.3891, "step": 15777 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.279840992223147e-07, "loss": 0.3967, "step": 15778 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.2766621240162591e-07, "loss": 0.4552, "step": 15779 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 1.2734871831928673e-07, "loss": 0.3716, "step": 15780 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.270316169879293e-07, "loss": 0.4194, "step": 15781 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2671490842016798e-07, "loss": 0.4205, "step": 15782 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2639859262860154e-07, "loss": 0.4669, "step": 15783 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2608266962581218e-07, "loss": 0.4634, "step": 15784 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.257671394243698e-07, "loss": 0.4316, "step": 15785 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2545200203682662e-07, "loss": 0.4533, "step": 15786 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2513725747571813e-07, "loss": 0.4079, "step": 15787 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.248229057535666e-07, "loss": 0.372, "step": 15788 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.245089468828764e-07, "loss": 0.3649, "step": 15789 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.241953808761387e-07, "loss": 0.4574, "step": 15790 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.238822077458257e-07, "loss": 0.4024, "step": 15791 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.235694275043986e-07, "loss": 0.4529, "step": 15792 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2325704016429852e-07, "loss": 0.458, "step": 15793 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2294504573795329e-07, "loss": 0.4071, "step": 15794 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2263344423777412e-07, "loss": 0.3978, "step": 15795 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2232223567615663e-07, "loss": 0.4083, "step": 15796 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2201142006548317e-07, "loss": 0.4397, "step": 15797 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2170099741811714e-07, "loss": 0.3847, "step": 15798 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2139096774640868e-07, "loss": 0.4458, "step": 15799 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2108133106268905e-07, "loss": 0.4332, "step": 15800 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2077208737927837e-07, "loss": 0.3951, "step": 15801 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2046323670847794e-07, "loss": 0.4012, "step": 15802 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.2015477906257456e-07, "loss": 0.3489, "step": 15803 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1984671445383845e-07, "loss": 0.3661, "step": 15804 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1953904289452756e-07, "loss": 0.3995, "step": 15805 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1923176439687767e-07, "loss": 0.4232, "step": 15806 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1892487897311566e-07, "loss": 0.4415, "step": 15807 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1861838663544955e-07, "loss": 0.3379, "step": 15808 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1831228739607182e-07, "loss": 0.4504, "step": 15809 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1800658126715936e-07, "loss": 0.5023, "step": 15810 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1770126826087358e-07, "loss": 0.3421, "step": 15811 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1739634838936031e-07, "loss": 0.4386, "step": 15812 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1709182166475097e-07, "loss": 0.4015, "step": 15813 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1678768809915919e-07, "loss": 0.4641, "step": 15814 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1648394770468418e-07, "loss": 0.399, "step": 15815 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1618060049340852e-07, "loss": 0.3973, "step": 15816 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.158776464774003e-07, "loss": 0.4091, "step": 15817 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1557508566871323e-07, "loss": 0.4131, "step": 15818 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1527291807938101e-07, "loss": 0.3739, "step": 15819 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1497114372142625e-07, "loss": 0.4649, "step": 15820 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.146697626068527e-07, "loss": 0.3913, "step": 15821 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1436877474765074e-07, "loss": 0.5136, "step": 15822 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1406818015579413e-07, "loss": 0.3787, "step": 15823 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1376797884324109e-07, "loss": 0.4513, "step": 15824 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1346817082193318e-07, "loss": 0.4283, "step": 15825 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1316875610379862e-07, "loss": 0.4908, "step": 15826 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1286973470074791e-07, "loss": 0.3793, "step": 15827 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1257110662467597e-07, "loss": 0.4143, "step": 15828 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1227287188746438e-07, "loss": 0.403, "step": 15829 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1197503050097591e-07, "loss": 0.3822, "step": 15830 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1167758247706106e-07, "loss": 0.3849, "step": 15831 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1138052782755038e-07, "loss": 0.3672, "step": 15832 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1108386656426218e-07, "loss": 0.4193, "step": 15833 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1078759869899925e-07, "loss": 0.3779, "step": 15834 }, { "epoch": 2.86, "grad_norm": 0.0, "learning_rate": 1.1049172424354659e-07, "loss": 0.3948, "step": 15835 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.1019624320967592e-07, "loss": 0.5102, "step": 15836 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0990115560914006e-07, "loss": 0.4354, "step": 15837 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0960646145367737e-07, "loss": 0.4634, "step": 15838 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0931216075501516e-07, "loss": 0.4471, "step": 15839 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.090182535248574e-07, "loss": 0.3573, "step": 15840 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0872473977489806e-07, "loss": 0.3519, "step": 15841 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0843161951681336e-07, "loss": 0.3831, "step": 15842 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0813889276226397e-07, "loss": 0.4561, "step": 15843 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0784655952289614e-07, "loss": 0.4617, "step": 15844 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0755461981033721e-07, "loss": 0.4163, "step": 15845 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0726307363620237e-07, "loss": 0.3493, "step": 15846 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0697192101209009e-07, "loss": 0.4339, "step": 15847 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0668116194958222e-07, "loss": 0.4582, "step": 15848 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0639079646024508e-07, "loss": 0.3569, "step": 15849 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0610082455563165e-07, "loss": 0.4869, "step": 15850 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0581124624727712e-07, "loss": 0.4362, "step": 15851 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0552206154670008e-07, "loss": 0.3987, "step": 15852 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0523327046540577e-07, "loss": 0.3988, "step": 15853 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0494487301488276e-07, "loss": 0.4691, "step": 15854 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.04656869206603e-07, "loss": 0.48, "step": 15855 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0436925905202511e-07, "loss": 0.3858, "step": 15856 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0408204256259102e-07, "loss": 0.4632, "step": 15857 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0379521974972606e-07, "loss": 0.4018, "step": 15858 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0350879062483999e-07, "loss": 0.3478, "step": 15859 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.03222755199327e-07, "loss": 0.3766, "step": 15860 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0293711348456692e-07, "loss": 0.4635, "step": 15861 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0265186549192396e-07, "loss": 0.4024, "step": 15862 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0236701123274462e-07, "loss": 0.3877, "step": 15863 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0208255071836204e-07, "loss": 0.3638, "step": 15864 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0179848396009051e-07, "loss": 0.4353, "step": 15865 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.015148109692321e-07, "loss": 0.3946, "step": 15866 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0123153175707112e-07, "loss": 0.4206, "step": 15867 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0094864633487855e-07, "loss": 0.4229, "step": 15868 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.006661547139054e-07, "loss": 0.374, "step": 15869 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0038405690539266e-07, "loss": 0.4875, "step": 15870 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 1.0010235292055914e-07, "loss": 0.4093, "step": 15871 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.982104277061477e-08, "loss": 0.4516, "step": 15872 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.954012646674948e-08, "loss": 0.4928, "step": 15873 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.925960402013879e-08, "loss": 0.4214, "step": 15874 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.897947544194154e-08, "loss": 0.4157, "step": 15875 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.869974074330102e-08, "loss": 0.3118, "step": 15876 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.842039993534724e-08, "loss": 0.3925, "step": 15877 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.814145302919243e-08, "loss": 0.4578, "step": 15878 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.786290003593324e-08, "loss": 0.3631, "step": 15879 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.758474096665194e-08, "loss": 0.4146, "step": 15880 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.7306975832413e-08, "loss": 0.3827, "step": 15881 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.702960464426648e-08, "loss": 0.4536, "step": 15882 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.675262741324798e-08, "loss": 0.3364, "step": 15883 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.647604415037426e-08, "loss": 0.363, "step": 15884 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.619985486664985e-08, "loss": 0.371, "step": 15885 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.592405957306039e-08, "loss": 0.468, "step": 15886 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.564865828057935e-08, "loss": 0.4159, "step": 15887 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.537365100016128e-08, "loss": 0.4137, "step": 15888 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.509903774274743e-08, "loss": 0.3418, "step": 15889 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 9.482481851926128e-08, "loss": 0.367, "step": 15890 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.455099334061302e-08, "loss": 0.4002, "step": 15891 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.427756221769502e-08, "loss": 0.4416, "step": 15892 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.400452516138414e-08, "loss": 0.5109, "step": 15893 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.373188218254392e-08, "loss": 0.3818, "step": 15894 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.345963329202012e-08, "loss": 0.4724, "step": 15895 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.318777850064298e-08, "loss": 0.4408, "step": 15896 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.291631781922717e-08, "loss": 0.4195, "step": 15897 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.264525125857071e-08, "loss": 0.5055, "step": 15898 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.237457882945943e-08, "loss": 0.4586, "step": 15899 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.210430054266029e-08, "loss": 0.4172, "step": 15900 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.183441640892576e-08, "loss": 0.3574, "step": 15901 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.156492643899062e-08, "loss": 0.338, "step": 15902 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.129583064357738e-08, "loss": 0.4653, "step": 15903 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.102712903339084e-08, "loss": 0.3973, "step": 15904 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.075882161911909e-08, "loss": 0.4962, "step": 15905 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.049090841143805e-08, "loss": 0.4806, "step": 15906 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 9.022338942100361e-08, "loss": 0.3376, "step": 15907 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.995626465846063e-08, "loss": 0.421, "step": 15908 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.968953413443282e-08, "loss": 0.4383, "step": 15909 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.942319785953279e-08, "loss": 0.3807, "step": 15910 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.915725584435653e-08, "loss": 0.4371, "step": 15911 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.889170809948222e-08, "loss": 0.3936, "step": 15912 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.862655463547476e-08, "loss": 0.3877, "step": 15913 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.836179546288126e-08, "loss": 0.4109, "step": 15914 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.809743059223552e-08, "loss": 0.438, "step": 15915 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.783346003405469e-08, "loss": 0.386, "step": 15916 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.756988379883813e-08, "loss": 0.4077, "step": 15917 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.730670189707413e-08, "loss": 0.4427, "step": 15918 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.704391433922987e-08, "loss": 0.3893, "step": 15919 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.678152113576032e-08, "loss": 0.3928, "step": 15920 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.651952229710492e-08, "loss": 0.3319, "step": 15921 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.625791783368532e-08, "loss": 0.3495, "step": 15922 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.599670775590874e-08, "loss": 0.359, "step": 15923 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.573589207416688e-08, "loss": 0.3969, "step": 15924 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.547547079883477e-08, "loss": 0.4725, "step": 15925 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.521544394027414e-08, "loss": 0.4067, "step": 15926 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.495581150882782e-08, "loss": 0.3879, "step": 15927 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.469657351482418e-08, "loss": 0.4611, "step": 15928 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.443772996857613e-08, "loss": 0.411, "step": 15929 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.417928088038208e-08, "loss": 0.4737, "step": 15930 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.392122626052379e-08, "loss": 0.3728, "step": 15931 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.36635661192653e-08, "loss": 0.3992, "step": 15932 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.340630046685838e-08, "loss": 0.4123, "step": 15933 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.314942931353709e-08, "loss": 0.4869, "step": 15934 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.28929526695188e-08, "loss": 0.3636, "step": 15935 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.263687054500869e-08, "loss": 0.4219, "step": 15936 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.238118295019304e-08, "loss": 0.4936, "step": 15937 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.212588989524373e-08, "loss": 0.4054, "step": 15938 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.187099139031707e-08, "loss": 0.4514, "step": 15939 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.161648744555272e-08, "loss": 0.3748, "step": 15940 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.136237807107594e-08, "loss": 0.4785, "step": 15941 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.11086632769953e-08, "loss": 0.3798, "step": 15942 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.085534307340381e-08, "loss": 0.428, "step": 15943 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.060241747038011e-08, "loss": 0.4026, "step": 15944 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.034988647798392e-08, "loss": 0.4498, "step": 15945 }, { "epoch": 2.88, "grad_norm": 0.0, "learning_rate": 8.009775010626274e-08, "loss": 0.4, "step": 15946 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.984600836524637e-08, "loss": 0.5135, "step": 15947 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.959466126495118e-08, "loss": 0.395, "step": 15948 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.934370881537478e-08, "loss": 0.3699, "step": 15949 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.909315102650028e-08, "loss": 0.3954, "step": 15950 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.884298790829526e-08, "loss": 0.4029, "step": 15951 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.859321947071285e-08, "loss": 0.5042, "step": 15952 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.834384572368736e-08, "loss": 0.4014, "step": 15953 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.809486667714195e-08, "loss": 0.4219, "step": 15954 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.784628234097868e-08, "loss": 0.421, "step": 15955 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.759809272508856e-08, "loss": 0.4212, "step": 15956 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.735029783934367e-08, "loss": 0.4207, "step": 15957 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.71028976936028e-08, "loss": 0.4292, "step": 15958 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.685589229770806e-08, "loss": 0.4443, "step": 15959 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.660928166148496e-08, "loss": 0.5172, "step": 15960 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.636306579474451e-08, "loss": 0.3277, "step": 15961 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.611724470727999e-08, "loss": 0.3473, "step": 15962 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.587181840887358e-08, "loss": 0.3695, "step": 15963 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.562678690928637e-08, "loss": 0.4361, "step": 15964 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.538215021826723e-08, "loss": 0.4524, "step": 15965 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.513790834554835e-08, "loss": 0.3936, "step": 15966 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.489406130084532e-08, "loss": 0.4687, "step": 15967 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.465060909385924e-08, "loss": 0.427, "step": 15968 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.440755173427461e-08, "loss": 0.4704, "step": 15969 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.416488923176146e-08, "loss": 0.4433, "step": 15970 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.392262159597318e-08, "loss": 0.4264, "step": 15971 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.368074883654763e-08, "loss": 0.3807, "step": 15972 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.343927096310488e-08, "loss": 0.431, "step": 15973 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.319818798525502e-08, "loss": 0.4231, "step": 15974 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.295749991258595e-08, "loss": 0.404, "step": 15975 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.27172067546733e-08, "loss": 0.417, "step": 15976 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.247730852107727e-08, "loss": 0.3573, "step": 15977 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.223780522134016e-08, "loss": 0.4038, "step": 15978 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.199869686498994e-08, "loss": 0.4591, "step": 15979 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.175998346154012e-08, "loss": 0.448, "step": 15980 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.152166502048529e-08, "loss": 0.434, "step": 15981 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.12837415513079e-08, "loss": 0.3903, "step": 15982 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.104621306347148e-08, "loss": 0.4073, "step": 15983 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.080907956642624e-08, "loss": 0.4467, "step": 15984 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.057234106960464e-08, "loss": 0.4244, "step": 15985 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.033599758242693e-08, "loss": 0.3865, "step": 15986 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 7.010004911429335e-08, "loss": 0.4966, "step": 15987 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.986449567458975e-08, "loss": 0.3686, "step": 15988 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.962933727268861e-08, "loss": 0.3889, "step": 15989 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.939457391794246e-08, "loss": 0.4069, "step": 15990 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.916020561969384e-08, "loss": 0.3529, "step": 15991 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.892623238726415e-08, "loss": 0.4081, "step": 15992 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.869265422996262e-08, "loss": 0.4086, "step": 15993 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.845947115707963e-08, "loss": 0.4927, "step": 15994 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.822668317789327e-08, "loss": 0.463, "step": 15995 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.799429030166283e-08, "loss": 0.4798, "step": 15996 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.776229253763423e-08, "loss": 0.4107, "step": 15997 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.753068989503675e-08, "loss": 0.4655, "step": 15998 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.729948238308304e-08, "loss": 0.4345, "step": 15999 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.70686700109724e-08, "loss": 0.3789, "step": 16000 }, { "epoch": 2.89, "grad_norm": 0.0, "learning_rate": 6.683825278788525e-08, "loss": 0.4432, "step": 16001 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.660823072298983e-08, "loss": 0.3581, "step": 16002 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.637860382543548e-08, "loss": 0.3915, "step": 16003 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.614937210435712e-08, "loss": 0.4042, "step": 16004 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.592053556887523e-08, "loss": 0.4238, "step": 16005 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.569209422809141e-08, "loss": 0.4892, "step": 16006 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.546404809109508e-08, "loss": 0.4285, "step": 16007 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.523639716695673e-08, "loss": 0.4664, "step": 16008 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.500914146473469e-08, "loss": 0.4033, "step": 16009 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.47822809934695e-08, "loss": 0.3729, "step": 16010 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.455581576218395e-08, "loss": 0.4193, "step": 16011 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.43297457798886e-08, "loss": 0.4275, "step": 16012 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.410407105557625e-08, "loss": 0.4114, "step": 16013 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.38787915982253e-08, "loss": 0.4439, "step": 16014 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.365390741679855e-08, "loss": 0.44, "step": 16015 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.342941852023999e-08, "loss": 0.3685, "step": 16016 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.320532491748244e-08, "loss": 0.446, "step": 16017 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.298162661743878e-08, "loss": 0.4387, "step": 16018 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.275832362900969e-08, "loss": 0.4344, "step": 16019 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.253541596107804e-08, "loss": 0.4623, "step": 16020 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.231290362251009e-08, "loss": 0.5075, "step": 16021 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.209078662215984e-08, "loss": 0.3562, "step": 16022 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.186906496886358e-08, "loss": 0.4322, "step": 16023 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.16477386714398e-08, "loss": 0.4613, "step": 16024 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.14268077386948e-08, "loss": 0.3681, "step": 16025 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.120627217941711e-08, "loss": 0.4152, "step": 16026 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.098613200237969e-08, "loss": 0.4113, "step": 16027 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.07663872163411e-08, "loss": 0.3654, "step": 16028 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.054703783004101e-08, "loss": 0.4058, "step": 16029 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.032808385220801e-08, "loss": 0.378, "step": 16030 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 6.010952529155178e-08, "loss": 0.4679, "step": 16031 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.98913621567665e-08, "loss": 0.3796, "step": 16032 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.967359445653075e-08, "loss": 0.4353, "step": 16033 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.945622219950875e-08, "loss": 0.439, "step": 16034 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.923924539434689e-08, "loss": 0.3684, "step": 16035 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.9022664049677156e-08, "loss": 0.4334, "step": 16036 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.8806478174116e-08, "loss": 0.3828, "step": 16037 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.85906877762632e-08, "loss": 0.4419, "step": 16038 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.8375292864701894e-08, "loss": 0.4461, "step": 16039 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.8160293448005225e-08, "loss": 0.3969, "step": 16040 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.7945689534721906e-08, "loss": 0.4357, "step": 16041 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.773148113339066e-08, "loss": 0.4043, "step": 16042 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.7517668252533575e-08, "loss": 0.4368, "step": 16043 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.7304250900654945e-08, "loss": 0.43, "step": 16044 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.709122908624687e-08, "loss": 0.4545, "step": 16045 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.687860281778368e-08, "loss": 0.3136, "step": 16046 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.666637210372194e-08, "loss": 0.4731, "step": 16047 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.6454536952507134e-08, "loss": 0.3741, "step": 16048 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.6243097372563614e-08, "loss": 0.3869, "step": 16049 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.6032053372304665e-08, "loss": 0.4526, "step": 16050 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.582140496012578e-08, "loss": 0.3683, "step": 16051 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.561115214440693e-08, "loss": 0.3255, "step": 16052 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.5401294933511426e-08, "loss": 0.443, "step": 16053 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.5191833335789257e-08, "loss": 0.3882, "step": 16054 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.4982767359572645e-08, "loss": 0.4038, "step": 16055 }, { "epoch": 2.9, "grad_norm": 0.0, "learning_rate": 5.477409701317715e-08, "loss": 0.4562, "step": 16056 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.456582230490615e-08, "loss": 0.4256, "step": 16057 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.435794324304411e-08, "loss": 0.3571, "step": 16058 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.415045983585998e-08, "loss": 0.4253, "step": 16059 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.394337209160938e-08, "loss": 0.456, "step": 16060 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.373668001853016e-08, "loss": 0.3965, "step": 16061 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.353038362484464e-08, "loss": 0.5174, "step": 16062 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.332448291875958e-08, "loss": 0.3574, "step": 16063 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.3118977908466205e-08, "loss": 0.4465, "step": 16064 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.291386860214021e-08, "loss": 0.342, "step": 16065 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.27091550079395e-08, "loss": 0.4031, "step": 16066 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.2504837134010886e-08, "loss": 0.3773, "step": 16067 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.230091498848011e-08, "loss": 0.5053, "step": 16068 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.209738857946067e-08, "loss": 0.3734, "step": 16069 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.189425791504832e-08, "loss": 0.4537, "step": 16070 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.169152300332436e-08, "loss": 0.3977, "step": 16071 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.148918385235457e-08, "loss": 0.4957, "step": 16072 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.128724047018696e-08, "loss": 0.4616, "step": 16073 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.108569286485732e-08, "loss": 0.4218, "step": 16074 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.0884541044381454e-08, "loss": 0.3957, "step": 16075 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.0683785016761856e-08, "loss": 0.4746, "step": 16076 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.048342478998547e-08, "loss": 0.4632, "step": 16077 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.028346037202481e-08, "loss": 0.4345, "step": 16078 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 5.0083891770831285e-08, "loss": 0.4278, "step": 16079 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.9884718994345214e-08, "loss": 0.3016, "step": 16080 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.9685942050491376e-08, "loss": 0.3979, "step": 16081 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.948756094717566e-08, "loss": 0.4892, "step": 16082 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.928957569229176e-08, "loss": 0.4119, "step": 16083 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.9091986293715586e-08, "loss": 0.4232, "step": 16084 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.889479275930642e-08, "loss": 0.4743, "step": 16085 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.86979950969102e-08, "loss": 0.3942, "step": 16086 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.8501593314354e-08, "loss": 0.4027, "step": 16087 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.830558741945268e-08, "loss": 0.379, "step": 16088 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.810997742000334e-08, "loss": 0.3728, "step": 16089 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.791476332378864e-08, "loss": 0.3814, "step": 16090 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.7719945138572366e-08, "loss": 0.3559, "step": 16091 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.75255228721061e-08, "loss": 0.3917, "step": 16092 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.7331496532124767e-08, "loss": 0.3712, "step": 16093 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.7137866126345524e-08, "loss": 0.4022, "step": 16094 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.694463166247221e-08, "loss": 0.5049, "step": 16095 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.6751793148192005e-08, "loss": 0.3981, "step": 16096 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.655935059117655e-08, "loss": 0.3829, "step": 16097 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.636730399908196e-08, "loss": 0.5285, "step": 16098 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.6175653379546545e-08, "loss": 0.3905, "step": 16099 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.5984398740195336e-08, "loss": 0.3851, "step": 16100 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.579354008863779e-08, "loss": 0.4345, "step": 16101 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.560307743246562e-08, "loss": 0.4423, "step": 16102 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.541301077925497e-08, "loss": 0.461, "step": 16103 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.522334013656871e-08, "loss": 0.4047, "step": 16104 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.5034065511951884e-08, "loss": 0.418, "step": 16105 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.484518691293294e-08, "loss": 0.4436, "step": 16106 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.4656704347026956e-08, "loss": 0.4165, "step": 16107 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.4468617821731285e-08, "loss": 0.4402, "step": 16108 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.428092734452882e-08, "loss": 0.4712, "step": 16109 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.409363292288582e-08, "loss": 0.4618, "step": 16110 }, { "epoch": 2.91, "grad_norm": 0.0, "learning_rate": 4.3906734564254093e-08, "loss": 0.4093, "step": 16111 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.372023227606881e-08, "loss": 0.5073, "step": 16112 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.353412606574847e-08, "loss": 0.4117, "step": 16113 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.334841594069605e-08, "loss": 0.4513, "step": 16114 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.316310190830009e-08, "loss": 0.4017, "step": 16115 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.297818397593467e-08, "loss": 0.4579, "step": 16116 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.279366215095282e-08, "loss": 0.3987, "step": 16117 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.260953644069754e-08, "loss": 0.4108, "step": 16118 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.242580685249298e-08, "loss": 0.4345, "step": 16119 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.2242473393647733e-08, "loss": 0.3628, "step": 16120 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.205953607145485e-08, "loss": 0.5325, "step": 16121 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.1876994893194075e-08, "loss": 0.3759, "step": 16122 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.1694849866125155e-08, "loss": 0.4075, "step": 16123 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.1513100997493396e-08, "loss": 0.3999, "step": 16124 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.1331748294530794e-08, "loss": 0.3637, "step": 16125 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.115079176445269e-08, "loss": 0.3691, "step": 16126 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.097023141445555e-08, "loss": 0.3678, "step": 16127 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.079006725172363e-08, "loss": 0.4423, "step": 16128 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.0610299283423415e-08, "loss": 0.491, "step": 16129 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.0430927516706965e-08, "loss": 0.3733, "step": 16130 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.025195195870968e-08, "loss": 0.4545, "step": 16131 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 4.007337261655253e-08, "loss": 0.4718, "step": 16132 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.989518949733873e-08, "loss": 0.3594, "step": 16133 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.971740260815704e-08, "loss": 0.4464, "step": 16134 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.9540011956079594e-08, "loss": 0.3972, "step": 16135 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.9363017548162965e-08, "loss": 0.3997, "step": 16136 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.91864193914504e-08, "loss": 0.4197, "step": 16137 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.9010217492965184e-08, "loss": 0.4291, "step": 16138 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.883441185971837e-08, "loss": 0.3769, "step": 16139 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.865900249870214e-08, "loss": 0.3785, "step": 16140 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.848398941689535e-08, "loss": 0.4043, "step": 16141 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.830937262126133e-08, "loss": 0.4505, "step": 16142 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.8135152118745634e-08, "loss": 0.3622, "step": 16143 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.796132791627827e-08, "loss": 0.509, "step": 16144 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.778790002077593e-08, "loss": 0.4108, "step": 16145 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.761486843913753e-08, "loss": 0.4404, "step": 16146 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.744223317824647e-08, "loss": 0.4661, "step": 16147 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.7269994244969466e-08, "loss": 0.3868, "step": 16148 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.7098151646158817e-08, "loss": 0.4293, "step": 16149 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.6926705388651286e-08, "loss": 0.4119, "step": 16150 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.675565547926807e-08, "loss": 0.432, "step": 16151 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.658500192481151e-08, "loss": 0.351, "step": 16152 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.641474473207285e-08, "loss": 0.4232, "step": 16153 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.624488390782333e-08, "loss": 0.3913, "step": 16154 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.607541945882198e-08, "loss": 0.3811, "step": 16155 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.590635139180898e-08, "loss": 0.4189, "step": 16156 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.5737679713511166e-08, "loss": 0.4486, "step": 16157 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.556940443063761e-08, "loss": 0.3824, "step": 16158 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.5401525549884074e-08, "loss": 0.4777, "step": 16159 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.5234043077927435e-08, "loss": 0.515, "step": 16160 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.506695702143237e-08, "loss": 0.3965, "step": 16161 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.490026738704355e-08, "loss": 0.3839, "step": 16162 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.473397418139346e-08, "loss": 0.382, "step": 16163 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.4568077411096804e-08, "loss": 0.4052, "step": 16164 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.440257708275496e-08, "loss": 0.4664, "step": 16165 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.4237473202949345e-08, "loss": 0.4478, "step": 16166 }, { "epoch": 2.92, "grad_norm": 0.0, "learning_rate": 3.407276577825025e-08, "loss": 0.4456, "step": 16167 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.3908454815207994e-08, "loss": 0.4885, "step": 16168 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.3744540320361785e-08, "loss": 0.3559, "step": 16169 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.358102230022975e-08, "loss": 0.373, "step": 16170 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.34179007613189e-08, "loss": 0.3588, "step": 16171 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.3255175710117385e-08, "loss": 0.4476, "step": 16172 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.309284715309891e-08, "loss": 0.3784, "step": 16173 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.293091509672053e-08, "loss": 0.3308, "step": 16174 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.276937954742487e-08, "loss": 0.4251, "step": 16175 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.260824051163902e-08, "loss": 0.406, "step": 16176 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.244749799577229e-08, "loss": 0.476, "step": 16177 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.228715200621957e-08, "loss": 0.4247, "step": 16178 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.2127202549359085e-08, "loss": 0.4497, "step": 16179 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.196764963155463e-08, "loss": 0.3711, "step": 16180 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.180849325915336e-08, "loss": 0.3746, "step": 16181 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.1649733438486874e-08, "loss": 0.4122, "step": 16182 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.149137017587012e-08, "loss": 0.3689, "step": 16183 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.133340347760361e-08, "loss": 0.4123, "step": 16184 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.117583334997232e-08, "loss": 0.4313, "step": 16185 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.1018659799243456e-08, "loss": 0.3756, "step": 16186 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.086188283167091e-08, "loss": 0.435, "step": 16187 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.070550245348969e-08, "loss": 0.4388, "step": 16188 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.054951867092259e-08, "loss": 0.439, "step": 16189 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.0393931490173555e-08, "loss": 0.3634, "step": 16190 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.023874091743317e-08, "loss": 0.442, "step": 16191 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 3.00839469588754e-08, "loss": 0.361, "step": 16192 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.992954962065642e-08, "loss": 0.405, "step": 16193 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.9775548908920205e-08, "loss": 0.3962, "step": 16194 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.962194482979297e-08, "loss": 0.3752, "step": 16195 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.946873738938427e-08, "loss": 0.4558, "step": 16196 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.9315926593790344e-08, "loss": 0.4815, "step": 16197 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.9163512449089658e-08, "loss": 0.3802, "step": 16198 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.9011494961345145e-08, "loss": 0.3851, "step": 16199 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.885987413660418e-08, "loss": 0.4324, "step": 16200 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.8708649980899728e-08, "loss": 0.3472, "step": 16201 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.8557822500245856e-08, "loss": 0.413, "step": 16202 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.8407391700645547e-08, "loss": 0.3923, "step": 16203 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.825735758807957e-08, "loss": 0.5037, "step": 16204 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.8107720168519815e-08, "loss": 0.3465, "step": 16205 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.7958479447917076e-08, "loss": 0.4053, "step": 16206 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.7809635432209935e-08, "loss": 0.4348, "step": 16207 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.7661188127318107e-08, "loss": 0.4389, "step": 16208 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.7513137539147972e-08, "loss": 0.3963, "step": 16209 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.7365483673588157e-08, "loss": 0.3487, "step": 16210 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.721822653651396e-08, "loss": 0.4734, "step": 16211 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.7071366133782917e-08, "loss": 0.3903, "step": 16212 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.6924902471238134e-08, "loss": 0.3985, "step": 16213 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.6778835554706063e-08, "loss": 0.4051, "step": 16214 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.6633165389996495e-08, "loss": 0.4599, "step": 16215 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.64878919829048e-08, "loss": 0.4679, "step": 16216 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.6343015339209687e-08, "loss": 0.4019, "step": 16217 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.6198535464675435e-08, "loss": 0.4518, "step": 16218 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.6054452365049665e-08, "loss": 0.4136, "step": 16219 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.5910766046064462e-08, "loss": 0.3718, "step": 16220 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.5767476513435253e-08, "loss": 0.4898, "step": 16221 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 2.5624583772863032e-08, "loss": 0.4776, "step": 16222 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.548208783003103e-08, "loss": 0.4379, "step": 16223 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.5339988690610274e-08, "loss": 0.4376, "step": 16224 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.519828636025068e-08, "loss": 0.3404, "step": 16225 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.5056980844592183e-08, "loss": 0.4612, "step": 16226 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.4916072149254732e-08, "loss": 0.3706, "step": 16227 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.4775560279843845e-08, "loss": 0.4003, "step": 16228 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.4635445241950605e-08, "loss": 0.3726, "step": 16229 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.449572704114722e-08, "loss": 0.3374, "step": 16230 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.4356405682993688e-08, "loss": 0.3749, "step": 16231 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.4217481173030023e-08, "loss": 0.4204, "step": 16232 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.407895351678513e-08, "loss": 0.4304, "step": 16233 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.3940822719770163e-08, "loss": 0.3644, "step": 16234 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.38030887874785e-08, "loss": 0.4116, "step": 16235 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.3665751725389098e-08, "loss": 0.3801, "step": 16236 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.3528811538968687e-08, "loss": 0.3755, "step": 16237 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.3392268233660697e-08, "loss": 0.4131, "step": 16238 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.325612181489967e-08, "loss": 0.3689, "step": 16239 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.3120372288102376e-08, "loss": 0.4447, "step": 16240 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.298501965866673e-08, "loss": 0.3536, "step": 16241 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.2850063931979525e-08, "loss": 0.4013, "step": 16242 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.2715505113407588e-08, "loss": 0.3903, "step": 16243 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.2581343208305518e-08, "loss": 0.4157, "step": 16244 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.244757822201016e-08, "loss": 0.4777, "step": 16245 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.23142101598417e-08, "loss": 0.3786, "step": 16246 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.2181239027108114e-08, "loss": 0.3653, "step": 16247 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.204866482909629e-08, "loss": 0.4023, "step": 16248 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.1916487571082e-08, "loss": 0.4561, "step": 16249 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.1784707258324378e-08, "loss": 0.4716, "step": 16250 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.165332389606367e-08, "loss": 0.4212, "step": 16251 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.1522337489527922e-08, "loss": 0.52, "step": 16252 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.1391748043927406e-08, "loss": 0.4062, "step": 16253 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.126155556445797e-08, "loss": 0.435, "step": 16254 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.113176005629769e-08, "loss": 0.443, "step": 16255 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.1002361524611326e-08, "loss": 0.4245, "step": 16256 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.087335997454587e-08, "loss": 0.4327, "step": 16257 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.0744755411233887e-08, "loss": 0.3903, "step": 16258 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.061654783979017e-08, "loss": 0.4034, "step": 16259 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.0488737265316194e-08, "loss": 0.4032, "step": 16260 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.0361323692895675e-08, "loss": 0.4707, "step": 16261 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.0234307127597887e-08, "loss": 0.5562, "step": 16262 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 2.0107687574475453e-08, "loss": 0.4224, "step": 16263 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.9981465038566573e-08, "loss": 0.4565, "step": 16264 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.9855639524890558e-08, "loss": 0.3845, "step": 16265 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.9730211038454518e-08, "loss": 0.3412, "step": 16266 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.96051795842489e-08, "loss": 0.5355, "step": 16267 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.948054516724529e-08, "loss": 0.4385, "step": 16268 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.935630779240416e-08, "loss": 0.4621, "step": 16269 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.9232467464666006e-08, "loss": 0.4132, "step": 16270 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.91090241889591e-08, "loss": 0.3783, "step": 16271 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.898597797019286e-08, "loss": 0.3684, "step": 16272 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.886332881326225e-08, "loss": 0.5123, "step": 16273 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.8741076723047815e-08, "loss": 0.429, "step": 16274 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.8619221704413438e-08, "loss": 0.344, "step": 16275 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.8497763762203026e-08, "loss": 0.3786, "step": 16276 }, { "epoch": 2.94, "grad_norm": 0.0, "learning_rate": 1.8376702901252708e-08, "loss": 0.3994, "step": 16277 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.82560391263753e-08, "loss": 0.4577, "step": 16278 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.8135772442373635e-08, "loss": 0.3992, "step": 16279 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.8015902854029432e-08, "loss": 0.4473, "step": 16280 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.7896430366114435e-08, "loss": 0.4483, "step": 16281 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.777735498337818e-08, "loss": 0.4096, "step": 16282 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.765867671056021e-08, "loss": 0.4272, "step": 16283 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.7540395552380073e-08, "loss": 0.4659, "step": 16284 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.7422511513544015e-08, "loss": 0.3922, "step": 16285 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.7305024598741616e-08, "loss": 0.4095, "step": 16286 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.718793481264691e-08, "loss": 0.3939, "step": 16287 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.7071242159917288e-08, "loss": 0.3874, "step": 16288 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.69549466451957e-08, "loss": 0.4458, "step": 16289 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.683904827310845e-08, "loss": 0.4275, "step": 16290 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.6723547048265177e-08, "loss": 0.3887, "step": 16291 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.6608442975262205e-08, "loss": 0.4023, "step": 16292 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.649373605867699e-08, "loss": 0.4029, "step": 16293 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.6379426303074765e-08, "loss": 0.4198, "step": 16294 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.626551371300078e-08, "loss": 0.4049, "step": 16295 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.615199829298919e-08, "loss": 0.4356, "step": 16296 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.6038880047553052e-08, "loss": 0.3673, "step": 16297 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.5926158981194318e-08, "loss": 0.3434, "step": 16298 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.5813835098396068e-08, "loss": 0.4563, "step": 16299 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.5701908403628065e-08, "loss": 0.4297, "step": 16300 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.5590378901342295e-08, "loss": 0.355, "step": 16301 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.5479246595975215e-08, "loss": 0.4195, "step": 16302 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.5368511491947736e-08, "loss": 0.4129, "step": 16303 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.5258173593665215e-08, "loss": 0.384, "step": 16304 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.514823290551859e-08, "loss": 0.5606, "step": 16305 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.5038689431879915e-08, "loss": 0.3848, "step": 16306 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.492954317710682e-08, "loss": 0.431, "step": 16307 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.4820794145542495e-08, "loss": 0.3601, "step": 16308 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.4712442341511257e-08, "loss": 0.4245, "step": 16309 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.4604487769326326e-08, "loss": 0.3266, "step": 16310 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.449693043327982e-08, "loss": 0.411, "step": 16311 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.4389770337651653e-08, "loss": 0.3714, "step": 16312 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.428300748670508e-08, "loss": 0.4615, "step": 16313 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.4176641884686703e-08, "loss": 0.413, "step": 16314 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.4070673535828694e-08, "loss": 0.4256, "step": 16315 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.3965102444345458e-08, "loss": 0.4182, "step": 16316 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.3859928614438079e-08, "loss": 0.4127, "step": 16317 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.3755152050289877e-08, "loss": 0.426, "step": 16318 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.3650772756068631e-08, "loss": 0.4358, "step": 16319 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.3546790735927683e-08, "loss": 0.3649, "step": 16320 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.3443205994002617e-08, "loss": 0.3905, "step": 16321 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.3340018534414578e-08, "loss": 0.4947, "step": 16322 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.3237228361269172e-08, "loss": 0.4793, "step": 16323 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.313483547865535e-08, "loss": 0.5044, "step": 16324 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.303283989064541e-08, "loss": 0.3343, "step": 16325 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.2931241601298327e-08, "loss": 0.4313, "step": 16326 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.2830040614655314e-08, "loss": 0.4478, "step": 16327 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.272923693474204e-08, "loss": 0.4434, "step": 16328 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.262883056556974e-08, "loss": 0.4003, "step": 16329 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.2528821511130773e-08, "loss": 0.3922, "step": 16330 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.2429209775405294e-08, "loss": 0.3981, "step": 16331 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.2329995362354574e-08, "loss": 0.3656, "step": 16332 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 1.223117827592768e-08, "loss": 0.3596, "step": 16333 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.213275852005369e-08, "loss": 0.4031, "step": 16334 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.2034736098649469e-08, "loss": 0.3427, "step": 16335 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.1937111015613013e-08, "loss": 0.4933, "step": 16336 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.183988327482899e-08, "loss": 0.4341, "step": 16337 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.1743052880165418e-08, "loss": 0.3428, "step": 16338 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.164661983547366e-08, "loss": 0.3585, "step": 16339 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.1550584144590648e-08, "loss": 0.4348, "step": 16340 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.1454945811336659e-08, "loss": 0.4413, "step": 16341 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.1359704839515317e-08, "loss": 0.479, "step": 16342 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.1264861232918034e-08, "loss": 0.4321, "step": 16343 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.1170414995315127e-08, "loss": 0.4393, "step": 16344 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.10763661304647e-08, "loss": 0.4664, "step": 16345 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.0982714642109316e-08, "loss": 0.414, "step": 16346 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.0889460533972662e-08, "loss": 0.3873, "step": 16347 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.0796603809766216e-08, "loss": 0.4306, "step": 16348 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.0704144473183687e-08, "loss": 0.3621, "step": 16349 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.0612082527902135e-08, "loss": 0.369, "step": 16350 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.0520417977585295e-08, "loss": 0.3464, "step": 16351 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.0429150825878032e-08, "loss": 0.4695, "step": 16352 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.0338281076414103e-08, "loss": 0.4193, "step": 16353 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.0247808732805065e-08, "loss": 0.3807, "step": 16354 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.015773379865248e-08, "loss": 0.3815, "step": 16355 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 1.006805627753793e-08, "loss": 0.3694, "step": 16356 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.97877617303078e-09, "loss": 0.4039, "step": 16357 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.889893488680413e-09, "loss": 0.4116, "step": 16358 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.801408228025111e-09, "loss": 0.4423, "step": 16359 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.71332039458428e-09, "loss": 0.3747, "step": 16360 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.625629991860674e-09, "loss": 0.3544, "step": 16361 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.538337023344834e-09, "loss": 0.4461, "step": 16362 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.451441492508429e-09, "loss": 0.2992, "step": 16363 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.36494340280869e-09, "loss": 0.3869, "step": 16364 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.278842757686202e-09, "loss": 0.4473, "step": 16365 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.193139560566e-09, "loss": 0.3861, "step": 16366 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.107833814858691e-09, "loss": 0.2757, "step": 16367 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 9.022925523956006e-09, "loss": 0.4033, "step": 16368 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.938414691237463e-09, "loss": 0.4223, "step": 16369 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.854301320064817e-09, "loss": 0.4458, "step": 16370 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.770585413783173e-09, "loss": 0.4622, "step": 16371 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.687266975723196e-09, "loss": 0.4329, "step": 16372 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.604346009201125e-09, "loss": 0.4566, "step": 16373 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.521822517513212e-09, "loss": 0.4841, "step": 16374 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.439696503944605e-09, "loss": 0.4109, "step": 16375 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.357967971760472e-09, "loss": 0.5028, "step": 16376 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.276636924213766e-09, "loss": 0.4383, "step": 16377 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.195703364538566e-09, "loss": 0.3996, "step": 16378 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.115167295955628e-09, "loss": 0.4218, "step": 16379 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 8.035028721667948e-09, "loss": 0.4086, "step": 16380 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 7.955287644864084e-09, "loss": 0.3664, "step": 16381 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 7.875944068715946e-09, "loss": 0.4127, "step": 16382 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 7.796997996381007e-09, "loss": 0.4188, "step": 16383 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 7.718449430998975e-09, "loss": 0.3998, "step": 16384 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 7.640298375694022e-09, "loss": 0.3912, "step": 16385 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 7.56254483357588e-09, "loss": 0.4176, "step": 16386 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 7.48518880773874e-09, "loss": 0.5123, "step": 16387 }, { "epoch": 2.96, "grad_norm": 0.0, "learning_rate": 7.408230301257924e-09, "loss": 0.4383, "step": 16388 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 7.331669317196533e-09, "loss": 0.4176, "step": 16389 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 7.255505858599909e-09, "loss": 0.4253, "step": 16390 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 7.179739928496743e-09, "loss": 0.5063, "step": 16391 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 7.1043715299035085e-09, "loss": 0.3759, "step": 16392 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 7.029400665815589e-09, "loss": 0.3883, "step": 16393 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.954827339218373e-09, "loss": 0.3853, "step": 16394 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.880651553076157e-09, "loss": 0.3569, "step": 16395 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.806873310342133e-09, "loss": 0.3761, "step": 16396 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.7334926139484006e-09, "loss": 0.4087, "step": 16397 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.660509466817067e-09, "loss": 0.3835, "step": 16398 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.587923871849144e-09, "loss": 0.3937, "step": 16399 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.5157358319345446e-09, "loss": 0.3839, "step": 16400 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.443945349942082e-09, "loss": 0.38, "step": 16401 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.372552428730583e-09, "loss": 0.4325, "step": 16402 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.301557071138886e-09, "loss": 0.4069, "step": 16403 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.2309592799914e-09, "loss": 0.4804, "step": 16404 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.160759058095878e-09, "loss": 0.2933, "step": 16405 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.090956408246751e-09, "loss": 0.4769, "step": 16406 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 6.021551333219578e-09, "loss": 0.3864, "step": 16407 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.952543835775482e-09, "loss": 0.426, "step": 16408 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.883933918660045e-09, "loss": 0.3932, "step": 16409 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.815721584602196e-09, "loss": 0.3785, "step": 16410 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.7479068363164306e-09, "loss": 0.3936, "step": 16411 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.680489676500589e-09, "loss": 0.4395, "step": 16412 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.613470107834751e-09, "loss": 0.4424, "step": 16413 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.546848132987892e-09, "loss": 0.4033, "step": 16414 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.480623754607894e-09, "loss": 0.3332, "step": 16415 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.4147969753304275e-09, "loss": 0.3828, "step": 16416 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.349367797773397e-09, "loss": 0.4843, "step": 16417 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.284336224541387e-09, "loss": 0.3635, "step": 16418 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.219702258220105e-09, "loss": 0.4053, "step": 16419 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.155465901380829e-09, "loss": 0.3996, "step": 16420 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.091627156579293e-09, "loss": 0.4914, "step": 16421 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 5.028186026354576e-09, "loss": 0.3781, "step": 16422 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.965142513231325e-09, "loss": 0.454, "step": 16423 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.902496619717534e-09, "loss": 0.3663, "step": 16424 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.840248348304544e-09, "loss": 0.3617, "step": 16425 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.7783977014692614e-09, "loss": 0.4404, "step": 16426 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.716944681671942e-09, "loss": 0.4348, "step": 16427 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.655889291357296e-09, "loss": 0.433, "step": 16428 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.595231532955602e-09, "loss": 0.4164, "step": 16429 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.534971408877153e-09, "loss": 0.3503, "step": 16430 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.475108921521143e-09, "loss": 0.4109, "step": 16431 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.415644073268999e-09, "loss": 0.3636, "step": 16432 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.356576866485496e-09, "loss": 0.3489, "step": 16433 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.297907303520976e-09, "loss": 0.4733, "step": 16434 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.239635386709129e-09, "loss": 0.5048, "step": 16435 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.1817611183692096e-09, "loss": 0.4925, "step": 16436 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.124284500801601e-09, "loss": 0.4335, "step": 16437 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.067205536294472e-09, "loss": 0.4337, "step": 16438 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 4.010524227117119e-09, "loss": 0.3814, "step": 16439 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 3.954240575525514e-09, "loss": 0.4287, "step": 16440 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 3.898354583758979e-09, "loss": 0.4167, "step": 16441 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 3.842866254039068e-09, "loss": 0.3725, "step": 16442 }, { "epoch": 2.97, "grad_norm": 0.0, "learning_rate": 3.787775588575126e-09, "loss": 0.439, "step": 16443 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.733082589557624e-09, "loss": 0.4335, "step": 16444 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.678787259162597e-09, "loss": 0.4101, "step": 16445 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.624889599550541e-09, "loss": 0.4622, "step": 16446 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.5713896128641846e-09, "loss": 0.5013, "step": 16447 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.518287301234047e-09, "loss": 0.456, "step": 16448 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.4655826667706617e-09, "loss": 0.3998, "step": 16449 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.41327571157124e-09, "loss": 0.4141, "step": 16450 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.3613664377174503e-09, "loss": 0.4289, "step": 16451 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.3098548472731975e-09, "loss": 0.3532, "step": 16452 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.2587409422879525e-09, "loss": 0.4274, "step": 16453 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.2080247247956443e-09, "loss": 0.4333, "step": 16454 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.157706196813548e-09, "loss": 0.4499, "step": 16455 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.1077853603433963e-09, "loss": 0.4266, "step": 16456 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.0582622173713773e-09, "loss": 0.3835, "step": 16457 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 3.009136769867027e-09, "loss": 0.414, "step": 16458 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.9604090197854487e-09, "loss": 0.4264, "step": 16459 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.912078969065091e-09, "loss": 0.3944, "step": 16460 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.8641466196266397e-09, "loss": 0.4071, "step": 16461 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.8166119733796794e-09, "loss": 0.4427, "step": 16462 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.7694750322138085e-09, "loss": 0.3403, "step": 16463 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.7227357980041947e-09, "loss": 0.4032, "step": 16464 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.6763942726104607e-09, "loss": 0.4256, "step": 16465 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.6304504578766875e-09, "loss": 0.4018, "step": 16466 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.5849043556303022e-09, "loss": 0.3779, "step": 16467 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.539755967682078e-09, "loss": 0.4031, "step": 16468 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.4950052958283567e-09, "loss": 0.3937, "step": 16469 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.4506523418510454e-09, "loss": 0.417, "step": 16470 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.4066971075131784e-09, "loss": 0.3845, "step": 16471 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.3631395945644676e-09, "loss": 0.3944, "step": 16472 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.31997980473575e-09, "loss": 0.3892, "step": 16473 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.277217739746762e-09, "loss": 0.4834, "step": 16474 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.2348534012961443e-09, "loss": 0.4158, "step": 16475 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.192886791069215e-09, "loss": 0.4027, "step": 16476 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.1513179107379712e-09, "loss": 0.4342, "step": 16477 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.1101467619544237e-09, "loss": 0.4033, "step": 16478 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.0693733463561516e-09, "loss": 0.412, "step": 16479 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 2.028997665565191e-09, "loss": 0.3627, "step": 16480 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.9890197211880347e-09, "loss": 0.3965, "step": 16481 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.949439514816742e-09, "loss": 0.4086, "step": 16482 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.9102570480233893e-09, "loss": 0.418, "step": 16483 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.87147232236784e-09, "loss": 0.4218, "step": 16484 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.8330853393921933e-09, "loss": 0.4124, "step": 16485 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.7950961006252265e-09, "loss": 0.4186, "step": 16486 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.757504607576843e-09, "loss": 0.4229, "step": 16487 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.7203108617425136e-09, "loss": 0.4595, "step": 16488 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.6835148646021648e-09, "loss": 0.368, "step": 16489 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.6471166176201814e-09, "loss": 0.4691, "step": 16490 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.6111161222431837e-09, "loss": 0.4377, "step": 16491 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.57551337990558e-09, "loss": 0.3723, "step": 16492 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.540308392020684e-09, "loss": 0.3532, "step": 16493 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.505501159991818e-09, "loss": 0.4316, "step": 16494 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.4710916852023194e-09, "loss": 0.4256, "step": 16495 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.4370799690210934e-09, "loss": 0.4158, "step": 16496 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.403466012801502e-09, "loss": 0.375, "step": 16497 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 1.3702498178802536e-09, "loss": 0.4692, "step": 16498 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.3374313855785136e-09, "loss": 0.3876, "step": 16499 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.3050107172030146e-09, "loss": 0.4178, "step": 16500 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.2729878140438357e-09, "loss": 0.4561, "step": 16501 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.2413626773732924e-09, "loss": 0.418, "step": 16502 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.2101353084492673e-09, "loss": 0.3817, "step": 16503 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.1793057085152104e-09, "loss": 0.497, "step": 16504 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.148873878797918e-09, "loss": 0.4242, "step": 16505 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.1188398205075335e-09, "loss": 0.3683, "step": 16506 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.0892035348386564e-09, "loss": 0.4045, "step": 16507 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.059965022970344e-09, "loss": 0.3568, "step": 16508 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.0311242860649994e-09, "loss": 0.3167, "step": 16509 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.0026813252717037e-09, "loss": 0.5143, "step": 16510 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 9.746361417195538e-10, "loss": 0.397, "step": 16511 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 9.469887365276542e-10, "loss": 0.3424, "step": 16512 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 9.197391107917952e-10, "loss": 0.4191, "step": 16513 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 8.928872655999954e-10, "loss": 0.4712, "step": 16514 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 8.664332020169586e-10, "loss": 0.4389, "step": 16515 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 8.403769210973966e-10, "loss": 0.4748, "step": 16516 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 8.147184238771477e-10, "loss": 0.4526, "step": 16517 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 7.894577113776169e-10, "loss": 0.39, "step": 16518 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 7.64594784602446e-10, "loss": 0.3599, "step": 16519 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 7.401296445408435e-10, "loss": 0.4861, "step": 16520 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 7.160622921675853e-10, "loss": 0.3831, "step": 16521 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 6.923927284374631e-10, "loss": 0.4235, "step": 16522 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 6.69120954295277e-10, "loss": 0.4293, "step": 16523 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 6.462469706647323e-10, "loss": 0.3971, "step": 16524 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 6.237707784562119e-10, "loss": 0.4725, "step": 16525 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 6.016923785645556e-10, "loss": 0.4305, "step": 16526 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 5.800117718668397e-10, "loss": 0.4634, "step": 16527 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 5.587289592268174e-10, "loss": 0.4066, "step": 16528 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 5.378439414893688e-10, "loss": 0.3565, "step": 16529 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 5.17356719487161e-10, "loss": 0.3418, "step": 16530 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 4.972672940350976e-10, "loss": 0.4148, "step": 16531 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 4.77575665931429e-10, "loss": 0.4879, "step": 16532 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 4.582818359599728e-10, "loss": 0.4425, "step": 16533 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 4.3938580488789293e-10, "loss": 0.3804, "step": 16534 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 4.2088757346792076e-10, "loss": 0.4609, "step": 16535 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 4.0278714243502383e-10, "loss": 0.4189, "step": 16536 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 3.8508451250973687e-10, "loss": 0.3931, "step": 16537 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 3.6777968439594137e-10, "loss": 0.4129, "step": 16538 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 3.508726587819755e-10, "loss": 0.4649, "step": 16539 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 3.343634363406345e-10, "loss": 0.47, "step": 16540 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 3.1825201772806016e-10, "loss": 0.382, "step": 16541 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 3.0253840358707155e-10, "loss": 0.3557, "step": 16542 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 2.8722259454050385e-10, "loss": 0.395, "step": 16543 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 2.723045911989797e-10, "loss": 0.378, "step": 16544 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 2.577843941564684e-10, "loss": 0.3851, "step": 16545 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 2.4366200398917573e-10, "loss": 0.5387, "step": 16546 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 2.2993742125887454e-10, "loss": 0.4322, "step": 16547 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 2.1661064651290475e-10, "loss": 0.435, "step": 16548 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 2.0368168028084279e-10, "loss": 0.3814, "step": 16549 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.9115052307672189e-10, "loss": 0.4236, "step": 16550 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.7901717539903218e-10, "loss": 0.3842, "step": 16551 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.672816377307207e-10, "loss": 0.3977, "step": 16552 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.5594391053919133e-10, "loss": 0.4224, "step": 16553 }, { "epoch": 2.99, "grad_norm": 0.0, "learning_rate": 1.450039942740844e-10, "loss": 0.4214, "step": 16554 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.3446188937282777e-10, "loss": 0.4063, "step": 16555 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.243175962517551e-10, "loss": 0.3548, "step": 16556 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.14571115317208e-10, "loss": 0.4061, "step": 16557 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.0522244695554407e-10, "loss": 0.3301, "step": 16558 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 9.627159153868804e-11, "loss": 0.3327, "step": 16559 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 8.771854942302149e-11, "loss": 0.3412, "step": 16560 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 7.956332094827268e-11, "loss": 0.505, "step": 16561 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 7.180590643973695e-11, "loss": 0.4025, "step": 16562 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 6.444630620494608e-11, "loss": 0.4196, "step": 16563 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 5.7484520538109155e-11, "loss": 0.4267, "step": 16564 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 5.092054971567173e-11, "loss": 0.3774, "step": 16565 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 4.475439399742598e-11, "loss": 0.5262, "step": 16566 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 3.898605363095165e-11, "loss": 0.4141, "step": 16567 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 3.3615528842734225e-11, "loss": 0.3922, "step": 16568 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 2.86428198492672e-11, "loss": 0.3298, "step": 16569 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 2.4067926848170274e-11, "loss": 0.3776, "step": 16570 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.989085001929958e-11, "loss": 0.3959, "step": 16571 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.611158953029879e-11, "loss": 0.4083, "step": 16572 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.2730145532158234e-11, "loss": 0.341, "step": 16573 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 9.746518156994455e-12, "loss": 0.3788, "step": 16574 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 7.160707525821764e-12, "loss": 0.4379, "step": 16575 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 4.9727137407806765e-12, "loss": 0.3499, "step": 16576 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 3.1825368895788132e-12, "loss": 0.4682, "step": 16577 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.7901770421602238e-12, "loss": 0.5102, "step": 16578 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 7.956342540360595e-13, "loss": 0.3814, "step": 16579 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 1.9890856517434943e-13, "loss": 0.3972, "step": 16580 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.363, "step": 16581 } ], "logging_steps": 1.0, "max_steps": 16581, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 8.638370608293347e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }