diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,37242 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 6202, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.3475935828877e-06, + "loss": 5.9486, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.06951871657754e-05, + "loss": 5.8951, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.6042780748663105e-05, + "loss": 5.9756, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 2.13903743315508e-05, + "loss": 5.7782, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.67379679144385e-05, + "loss": 5.959, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 3.208556149732621e-05, + "loss": 5.9055, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 3.74331550802139e-05, + "loss": 6.2998, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 4.27807486631016e-05, + "loss": 6.1521, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 4.8128342245989304e-05, + "loss": 6.4036, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 5.3475935828877e-05, + "loss": 6.501, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 5.882352941176471e-05, + "loss": 6.3223, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 6.417112299465242e-05, + "loss": 6.3368, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 6.951871657754011e-05, + "loss": 6.2648, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 7.48663101604278e-05, + "loss": 6.1554, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 8.021390374331551e-05, + "loss": 5.8798, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 8.55614973262032e-05, + "loss": 5.9567, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 9.090909090909092e-05, + "loss": 5.9103, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 9.625668449197861e-05, + "loss": 5.4526, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010160427807486631, + "loss": 5.367, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 0.000106951871657754, + "loss": 5.4023, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011229946524064172, + "loss": 5.3629, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011764705882352942, + "loss": 5.0887, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012299465240641713, + "loss": 5.1178, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012834224598930484, + "loss": 4.8329, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013368983957219252, + "loss": 4.849, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013903743315508022, + "loss": 4.8322, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001443850267379679, + "loss": 4.9571, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001497326203208556, + "loss": 4.8369, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015508021390374334, + "loss": 4.8279, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016042780748663101, + "loss": 4.8095, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016577540106951872, + "loss": 4.8473, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001711229946524064, + "loss": 4.6431, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017647058823529413, + "loss": 4.7465, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018181818181818183, + "loss": 4.7161, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001871657754010695, + "loss": 4.705, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019251336898395722, + "loss": 4.6889, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019786096256684495, + "loss": 4.5606, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020320855614973263, + "loss": 4.6834, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020855614973262033, + "loss": 4.5475, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 0.000213903743315508, + "loss": 4.7062, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021925133689839572, + "loss": 4.555, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022459893048128345, + "loss": 4.4943, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022994652406417113, + "loss": 4.6637, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023529411764705883, + "loss": 4.7428, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002406417112299465, + "loss": 4.5246, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 0.00024598930481283427, + "loss": 4.5559, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002513368983957219, + "loss": 4.3376, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002566844919786097, + "loss": 4.5093, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 0.00026203208556149733, + "loss": 4.3749, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 0.00026737967914438503, + "loss": 4.4069, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 0.00027272727272727274, + "loss": 4.5086, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 0.00027807486631016044, + "loss": 4.3684, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 0.00028342245989304815, + "loss": 4.4441, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002887700534759358, + "loss": 4.439, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029411764705882356, + "loss": 4.4707, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002994652406417112, + "loss": 4.2591, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003048128342245989, + "loss": 4.3509, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 0.00031016042780748667, + "loss": 4.4297, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003155080213903743, + "loss": 4.4056, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032085561497326203, + "loss": 4.398, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003262032085561498, + "loss": 4.2179, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 0.00033155080213903744, + "loss": 4.2304, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 0.00033689839572192514, + "loss": 4.3654, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003422459893048128, + "loss": 4.4312, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 0.00034759358288770055, + "loss": 4.2155, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035294117647058826, + "loss": 4.2967, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003582887700534759, + "loss": 4.2588, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 0.00036363636363636367, + "loss": 4.2611, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003689839572192513, + "loss": 4.2833, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 0.000374331550802139, + "loss": 4.343, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003796791443850268, + "loss": 4.3456, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038502673796791443, + "loss": 4.141, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039037433155080214, + "loss": 4.4074, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003957219251336899, + "loss": 4.247, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 0.00040106951871657755, + "loss": 4.3275, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 0.00040641711229946525, + "loss": 4.1954, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004117647058823529, + "loss": 4.134, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 0.00041711229946524066, + "loss": 4.3486, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 0.00042245989304812837, + "loss": 4.3122, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 0.000427807486631016, + "loss": 4.2094, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004331550802139038, + "loss": 4.3217, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 0.00043850267379679143, + "loss": 4.102, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 0.00044385026737967914, + "loss": 4.1534, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004491978609625669, + "loss": 4.2219, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045454545454545455, + "loss": 4.2487, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045989304812834225, + "loss": 4.2481, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 0.00046524064171123, + "loss": 4.2309, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047058823529411766, + "loss": 4.0941, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047593582887700537, + "loss": 4.0096, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 0.000481283422459893, + "loss": 4.0631, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004866310160427808, + "loss": 4.1869, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004919786096256685, + "loss": 4.1159, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004973262032085562, + "loss": 4.1178, + "step": 93 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005026737967914438, + "loss": 4.1284, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005080213903743316, + "loss": 4.0345, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005133689839572194, + "loss": 4.0867, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005187165775401069, + "loss": 4.057, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005240641711229947, + "loss": 3.937, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005294117647058824, + "loss": 3.9765, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005347593582887701, + "loss": 3.9608, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005401069518716578, + "loss": 4.1594, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005454545454545455, + "loss": 4.1302, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005508021390374331, + "loss": 4.1312, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005561497326203209, + "loss": 4.0223, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005614973262032086, + "loss": 4.0161, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005668449197860963, + "loss": 4.0451, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 0.000572192513368984, + "loss": 4.1083, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005775401069518716, + "loss": 4.1682, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005828877005347594, + "loss": 4.0605, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005882352941176471, + "loss": 4.0765, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005935828877005348, + "loss": 3.8291, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005989304812834224, + "loss": 4.0857, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006042780748663102, + "loss": 4.077, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006096256684491978, + "loss": 4.0375, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006149732620320856, + "loss": 3.8772, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006203208556149733, + "loss": 3.9426, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006256684491978609, + "loss": 4.0139, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006310160427807486, + "loss": 4.0414, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006363636363636364, + "loss": 4.0246, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006417112299465241, + "loss": 3.9336, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006470588235294118, + "loss": 4.0677, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006524064171122996, + "loss": 4.0379, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006577540106951871, + "loss": 3.8605, + "step": 123 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006631016042780749, + "loss": 3.8841, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006684491978609626, + "loss": 3.837, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006737967914438503, + "loss": 3.8734, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 0.000679144385026738, + "loss": 3.8831, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006844919786096256, + "loss": 3.8443, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006898395721925133, + "loss": 3.8587, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006951871657754011, + "loss": 4.0822, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007005347593582888, + "loss": 3.779, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007058823529411765, + "loss": 3.7747, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007112299465240642, + "loss": 3.8958, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007165775401069518, + "loss": 3.8679, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007219251336898396, + "loss": 3.6683, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007272727272727273, + "loss": 3.8482, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 0.000732620320855615, + "loss": 3.92, + "step": 137 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007379679144385026, + "loss": 3.8979, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007433155080213904, + "loss": 3.8828, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 0.000748663101604278, + "loss": 3.9528, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007540106951871658, + "loss": 3.9246, + "step": 141 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007593582887700536, + "loss": 3.8685, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007647058823529411, + "loss": 3.9712, + "step": 143 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007700534759358289, + "loss": 4.0532, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007754010695187166, + "loss": 3.9034, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007807486631016043, + "loss": 3.8289, + "step": 146 + }, + { + "epoch": 0.02, + "learning_rate": 0.000786096256684492, + "loss": 3.9289, + "step": 147 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007914438502673798, + "loss": 4.0015, + "step": 148 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007967914438502673, + "loss": 3.8183, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008021390374331551, + "loss": 3.8087, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008074866310160429, + "loss": 3.8361, + "step": 151 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008128342245989305, + "loss": 3.839, + "step": 152 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008181818181818183, + "loss": 3.8806, + "step": 153 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008235294117647058, + "loss": 3.8447, + "step": 154 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008288770053475936, + "loss": 3.77, + "step": 155 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008342245989304813, + "loss": 3.9146, + "step": 156 + }, + { + "epoch": 0.03, + "learning_rate": 0.000839572192513369, + "loss": 3.845, + "step": 157 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008449197860962567, + "loss": 3.8684, + "step": 158 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008502673796791444, + "loss": 4.0146, + "step": 159 + }, + { + "epoch": 0.03, + "learning_rate": 0.000855614973262032, + "loss": 3.7876, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008609625668449198, + "loss": 3.8683, + "step": 161 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008663101604278076, + "loss": 3.9205, + "step": 162 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008716577540106952, + "loss": 4.0075, + "step": 163 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008770053475935829, + "loss": 3.7433, + "step": 164 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008823529411764706, + "loss": 3.7349, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008877005347593583, + "loss": 3.8098, + "step": 166 + }, + { + "epoch": 0.03, + "learning_rate": 0.000893048128342246, + "loss": 3.885, + "step": 167 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008983957219251338, + "loss": 3.8511, + "step": 168 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009037433155080213, + "loss": 3.6956, + "step": 169 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009090909090909091, + "loss": 3.8744, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009144385026737968, + "loss": 3.7744, + "step": 171 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009197860962566845, + "loss": 3.9944, + "step": 172 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009251336898395723, + "loss": 3.8654, + "step": 173 + }, + { + "epoch": 0.03, + "learning_rate": 0.00093048128342246, + "loss": 3.8064, + "step": 174 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009358288770053476, + "loss": 3.8453, + "step": 175 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009411764705882353, + "loss": 3.6996, + "step": 176 + }, + { + "epoch": 0.03, + "learning_rate": 0.000946524064171123, + "loss": 3.9487, + "step": 177 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009518716577540107, + "loss": 3.7477, + "step": 178 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009572192513368985, + "loss": 3.7293, + "step": 179 + }, + { + "epoch": 0.03, + "learning_rate": 0.000962566844919786, + "loss": 3.8472, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009679144385026738, + "loss": 3.7662, + "step": 181 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009732620320855616, + "loss": 3.8273, + "step": 182 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009786096256684492, + "loss": 3.7248, + "step": 183 + }, + { + "epoch": 0.03, + "learning_rate": 0.000983957219251337, + "loss": 3.8215, + "step": 184 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009893048128342245, + "loss": 3.8897, + "step": 185 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009946524064171124, + "loss": 3.7155, + "step": 186 + }, + { + "epoch": 0.03, + "learning_rate": 0.001, + "loss": 4.0417, + "step": 187 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999999931802496, + "loss": 3.7861, + "step": 188 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999997272100022, + "loss": 3.8133, + "step": 189 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999993862225744, + "loss": 3.623, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999998908840306, + "loss": 3.7479, + "step": 191 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999982950633268, + "loss": 3.8731, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999975448918043, + "loss": 3.7765, + "step": 193 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999966583259434, + "loss": 3.8732, + "step": 194 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999995635365986, + "loss": 3.7426, + "step": 195 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999944760122107, + "loss": 3.88, + "step": 196 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999993180264934, + "loss": 3.7482, + "step": 197 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999917481245096, + "loss": 3.9271, + "step": 198 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999901795913278, + "loss": 3.8027, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999884746658165, + "loss": 3.8586, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999986633348441, + "loss": 3.8988, + "step": 201 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999846556397038, + "loss": 3.7926, + "step": 202 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999825415401438, + "loss": 3.7898, + "step": 203 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999802910503383, + "loss": 3.7323, + "step": 204 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999779041709005, + "loss": 3.7795, + "step": 205 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999753809024823, + "loss": 3.73, + "step": 206 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999727212457715, + "loss": 3.7879, + "step": 207 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999699252014938, + "loss": 3.6848, + "step": 208 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999966992770412, + "loss": 3.7039, + "step": 209 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999639239533257, + "loss": 3.8482, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999607187510726, + "loss": 3.7943, + "step": 211 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999573771645267, + "loss": 3.7842, + "step": 212 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999538991945997, + "loss": 3.9321, + "step": 213 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999502848422402, + "loss": 3.8289, + "step": 214 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999946534108434, + "loss": 3.7996, + "step": 215 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999426469942047, + "loss": 3.7495, + "step": 216 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999386235006124, + "loss": 3.7358, + "step": 217 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999344636287548, + "loss": 3.7833, + "step": 218 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999301673797665, + "loss": 3.7257, + "step": 219 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999257347548195, + "loss": 3.7508, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999211657551234, + "loss": 3.8274, + "step": 221 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999164603819238, + "loss": 3.8082, + "step": 222 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999116186365047, + "loss": 3.7433, + "step": 223 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999906640520187, + "loss": 3.7989, + "step": 224 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999015260343286, + "loss": 3.5308, + "step": 225 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998962751803246, + "loss": 3.7875, + "step": 226 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998908879596076, + "loss": 3.932, + "step": 227 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998853643736468, + "loss": 3.8212, + "step": 228 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999879704423949, + "loss": 3.789, + "step": 229 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998739081120585, + "loss": 3.7505, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998679754395566, + "loss": 3.8381, + "step": 231 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998619064080611, + "loss": 3.8199, + "step": 232 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999855701019228, + "loss": 3.7177, + "step": 233 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998493592747498, + "loss": 3.8603, + "step": 234 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998428811763566, + "loss": 3.7391, + "step": 235 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999836266725816, + "loss": 3.9236, + "step": 236 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998295159249315, + "loss": 3.7047, + "step": 237 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998226287755451, + "loss": 3.7526, + "step": 238 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998156052795355, + "loss": 3.6538, + "step": 239 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999808445438819, + "loss": 3.815, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998011492553481, + "loss": 3.7717, + "step": 241 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997937167311134, + "loss": 3.767, + "step": 242 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997861478681425, + "loss": 3.7465, + "step": 243 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997784426685002, + "loss": 3.6345, + "step": 244 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999770601134288, + "loss": 3.7467, + "step": 245 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997626232676454, + "loss": 3.7458, + "step": 246 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997545090707485, + "loss": 3.6346, + "step": 247 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997462585458108, + "loss": 3.7477, + "step": 248 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999737871695083, + "loss": 3.6396, + "step": 249 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997293485208528, + "loss": 3.7122, + "step": 250 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997206890254454, + "loss": 3.7103, + "step": 251 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997118932112228, + "loss": 3.8014, + "step": 252 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997029610805848, + "loss": 3.7202, + "step": 253 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996938926359676, + "loss": 3.6884, + "step": 254 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996846878798453, + "loss": 3.868, + "step": 255 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996753468147286, + "loss": 3.6932, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999665869443166, + "loss": 3.7192, + "step": 257 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996562557677425, + "loss": 3.7406, + "step": 258 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996465057910805, + "loss": 3.778, + "step": 259 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996366195158402, + "loss": 3.7166, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999626596944718, + "loss": 3.7759, + "step": 261 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996164380804483, + "loss": 3.774, + "step": 262 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996061429258022, + "loss": 3.7561, + "step": 263 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995957114835878, + "loss": 3.837, + "step": 264 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995851437566514, + "loss": 3.6542, + "step": 265 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999574439747875, + "loss": 3.6925, + "step": 266 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995635994601792, + "loss": 3.6578, + "step": 267 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995526228965205, + "loss": 3.7383, + "step": 268 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999541510059894, + "loss": 3.7585, + "step": 269 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995302609533303, + "loss": 3.8008, + "step": 270 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995188755798985, + "loss": 3.6539, + "step": 271 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995073539427047, + "loss": 3.8845, + "step": 272 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994956960448912, + "loss": 3.5674, + "step": 273 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994839018896387, + "loss": 3.6741, + "step": 274 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994719714801641, + "loss": 3.7959, + "step": 275 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994599048197223, + "loss": 3.7257, + "step": 276 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994477019116049, + "loss": 3.6391, + "step": 277 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994353627591403, + "loss": 3.6798, + "step": 278 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999422887365695, + "loss": 3.6971, + "step": 279 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009994102757346721, + "loss": 3.6488, + "step": 280 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993975278695117, + "loss": 3.7403, + "step": 281 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993846437736913, + "loss": 3.6619, + "step": 282 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999371623450726, + "loss": 3.6132, + "step": 283 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999358466904167, + "loss": 3.7227, + "step": 284 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993451741376034, + "loss": 3.8561, + "step": 285 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993317451546617, + "loss": 3.6644, + "step": 286 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999318179959005, + "loss": 3.7534, + "step": 287 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993044785543337, + "loss": 3.7825, + "step": 288 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992906409443854, + "loss": 3.7098, + "step": 289 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999276667132935, + "loss": 3.684, + "step": 290 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999262557123794, + "loss": 3.6203, + "step": 291 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992483109208122, + "loss": 3.5992, + "step": 292 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999233928527875, + "loss": 3.6556, + "step": 293 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992194099489064, + "loss": 3.7577, + "step": 294 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992047551878667, + "loss": 3.6756, + "step": 295 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991899642487535, + "loss": 3.8233, + "step": 296 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991750371356016, + "loss": 3.7358, + "step": 297 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991599738524831, + "loss": 3.6257, + "step": 298 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991447744035072, + "loss": 3.6915, + "step": 299 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099912943879282, + "loss": 3.731, + "step": 300 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991139670246047, + "loss": 3.7566, + "step": 301 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990983591030825, + "loss": 3.6893, + "step": 302 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990826150325103, + "loss": 3.6207, + "step": 303 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990667348171833, + "loss": 3.6676, + "step": 304 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990507184614336, + "loss": 3.6716, + "step": 305 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099903456596963, + "loss": 3.86, + "step": 306 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990182773461792, + "loss": 3.8431, + "step": 307 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999001852595524, + "loss": 3.6791, + "step": 308 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989852917221451, + "loss": 3.717, + "step": 309 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989685947305602, + "loss": 3.6599, + "step": 310 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989517616253243, + "loss": 3.6526, + "step": 311 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998934792411029, + "loss": 3.6757, + "step": 312 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989176870923034, + "loss": 3.6942, + "step": 313 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989004456738137, + "loss": 3.7508, + "step": 314 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988830681602631, + "loss": 3.601, + "step": 315 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998865554556392, + "loss": 3.5761, + "step": 316 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988479048669782, + "loss": 3.6358, + "step": 317 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988301190968362, + "loss": 3.7138, + "step": 318 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988121972508176, + "loss": 3.6576, + "step": 319 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987941393338115, + "loss": 3.572, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987759453507438, + "loss": 3.7492, + "step": 321 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998757615306578, + "loss": 3.6342, + "step": 322 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987391492063138, + "loss": 3.5609, + "step": 323 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998720547054989, + "loss": 3.5772, + "step": 324 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987018088576777, + "loss": 3.751, + "step": 325 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986829346194917, + "loss": 3.6777, + "step": 326 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099866392434558, + "loss": 3.7342, + "step": 327 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986447780411278, + "loss": 3.6687, + "step": 328 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986254957113586, + "loss": 3.5875, + "step": 329 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986060773615319, + "loss": 3.6872, + "step": 330 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985865229969453, + "loss": 3.5409, + "step": 331 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985668326229328, + "loss": 3.597, + "step": 332 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985470062448658, + "loss": 3.6946, + "step": 333 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985270438681525, + "loss": 3.7482, + "step": 334 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985069454982389, + "loss": 3.7012, + "step": 335 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984867111406076, + "loss": 3.5765, + "step": 336 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984663408007777, + "loss": 3.5477, + "step": 337 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984458344843067, + "loss": 3.8007, + "step": 338 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998425192196788, + "loss": 3.6531, + "step": 339 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998404413943853, + "loss": 3.6274, + "step": 340 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009983834997311697, + "loss": 3.7646, + "step": 341 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983624495644432, + "loss": 3.7375, + "step": 342 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983412634494157, + "loss": 3.6213, + "step": 343 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998319941391867, + "loss": 3.646, + "step": 344 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998298483397613, + "loss": 3.7726, + "step": 345 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982768894725076, + "loss": 3.6982, + "step": 346 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982551596224411, + "loss": 3.5818, + "step": 347 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982332938533417, + "loss": 3.7393, + "step": 348 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982112921711735, + "loss": 3.6548, + "step": 349 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981891545819388, + "loss": 3.6664, + "step": 350 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981668810916764, + "loss": 3.5646, + "step": 351 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981444717064622, + "loss": 3.5343, + "step": 352 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981219264324093, + "loss": 3.8004, + "step": 353 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998099245275668, + "loss": 3.6435, + "step": 354 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980764282424251, + "loss": 3.5236, + "step": 355 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998053475338905, + "loss": 3.7884, + "step": 356 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980303865713693, + "loss": 3.6316, + "step": 357 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980071619461162, + "loss": 3.6923, + "step": 358 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979838014694813, + "loss": 3.6906, + "step": 359 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979603051478367, + "loss": 3.7143, + "step": 360 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979366729875924, + "loss": 3.6242, + "step": 361 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979129049951948, + "loss": 3.6223, + "step": 362 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978890011771275, + "loss": 3.7303, + "step": 363 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978649615399113, + "loss": 3.7574, + "step": 364 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978407860901043, + "loss": 3.5757, + "step": 365 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978164748343007, + "loss": 3.5999, + "step": 366 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997792027779133, + "loss": 3.6576, + "step": 367 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977674449312692, + "loss": 3.6334, + "step": 368 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977427262974163, + "loss": 3.5762, + "step": 369 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977178718843169, + "loss": 3.6162, + "step": 370 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997692881698751, + "loss": 3.6593, + "step": 371 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976677557475353, + "loss": 3.623, + "step": 372 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976424940375246, + "loss": 3.7449, + "step": 373 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976170965756098, + "loss": 3.5766, + "step": 374 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975915633687188, + "loss": 3.4851, + "step": 375 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975658944238169, + "loss": 3.5713, + "step": 376 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975400897479065, + "loss": 3.6258, + "step": 377 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997514149348027, + "loss": 3.6143, + "step": 378 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974880732312541, + "loss": 3.8064, + "step": 379 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974618614047017, + "loss": 3.7325, + "step": 380 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974355138755197, + "loss": 3.5524, + "step": 381 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974090306508958, + "loss": 3.674, + "step": 382 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973824117380542, + "loss": 3.5691, + "step": 383 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973556571442564, + "loss": 3.5528, + "step": 384 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973287668768004, + "loss": 3.7901, + "step": 385 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997301740943022, + "loss": 3.4705, + "step": 386 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972745793502933, + "loss": 3.6123, + "step": 387 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972472821060238, + "loss": 3.6343, + "step": 388 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972198492176603, + "loss": 3.5437, + "step": 389 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971922806926857, + "loss": 3.6402, + "step": 390 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971645765386207, + "loss": 3.7182, + "step": 391 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971367367630226, + "loss": 3.5644, + "step": 392 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971087613734858, + "loss": 3.653, + "step": 393 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997080650377642, + "loss": 3.6866, + "step": 394 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997052403783159, + "loss": 3.6885, + "step": 395 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970240215977427, + "loss": 3.5455, + "step": 396 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969955038291352, + "loss": 3.6225, + "step": 397 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996966850485116, + "loss": 3.6166, + "step": 398 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969380615735017, + "loss": 3.6286, + "step": 399 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969091371021452, + "loss": 3.5394, + "step": 400 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996880077078937, + "loss": 3.6536, + "step": 401 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968508815118042, + "loss": 3.5967, + "step": 402 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968215504087114, + "loss": 3.5914, + "step": 403 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967920837776595, + "loss": 3.6483, + "step": 404 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967624816266869, + "loss": 3.7576, + "step": 405 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967327439638688, + "loss": 3.6956, + "step": 406 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996702870797317, + "loss": 3.6324, + "step": 407 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996672862135181, + "loss": 3.5627, + "step": 408 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966427179856467, + "loss": 3.8247, + "step": 409 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966124383569371, + "loss": 3.5794, + "step": 410 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965820232573125, + "loss": 3.7232, + "step": 411 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996551472695069, + "loss": 3.6576, + "step": 412 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965207866785414, + "loss": 3.5436, + "step": 413 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964899652161003, + "loss": 3.51, + "step": 414 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964590083161532, + "loss": 3.6684, + "step": 415 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964279159871449, + "loss": 3.5939, + "step": 416 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963966882375575, + "loss": 3.7163, + "step": 417 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996365325075909, + "loss": 3.6201, + "step": 418 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963338265107553, + "loss": 3.6707, + "step": 419 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996302192550689, + "loss": 3.5358, + "step": 420 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962704232043392, + "loss": 3.6123, + "step": 421 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962385184803726, + "loss": 3.6365, + "step": 422 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962064783874923, + "loss": 3.6887, + "step": 423 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961743029344382, + "loss": 3.4337, + "step": 424 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961419921299883, + "loss": 3.4868, + "step": 425 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996109545982956, + "loss": 3.6961, + "step": 426 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960769645021923, + "loss": 3.785, + "step": 427 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960442476965852, + "loss": 3.4476, + "step": 428 + }, + { + "epoch": 0.07, + "learning_rate": 0.00099601139557506, + "loss": 3.6732, + "step": 429 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959784081465775, + "loss": 3.5934, + "step": 430 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995945285420137, + "loss": 3.5392, + "step": 431 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959120274047738, + "loss": 3.4776, + "step": 432 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958786341095604, + "loss": 3.7242, + "step": 433 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958451055436064, + "loss": 3.5389, + "step": 434 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958114417160577, + "loss": 3.5481, + "step": 435 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957776426360975, + "loss": 3.5544, + "step": 436 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957437083129464, + "loss": 3.6186, + "step": 437 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957096387558605, + "loss": 3.4458, + "step": 438 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995675433974134, + "loss": 3.5658, + "step": 439 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956410939770978, + "loss": 3.5481, + "step": 440 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956066187741194, + "loss": 3.4711, + "step": 441 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955720083746034, + "loss": 3.5488, + "step": 442 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995537262787991, + "loss": 3.5963, + "step": 443 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955023820237602, + "loss": 3.6598, + "step": 444 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954673660914266, + "loss": 3.6826, + "step": 445 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995432215000542, + "loss": 3.5356, + "step": 446 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995396928760695, + "loss": 3.6369, + "step": 447 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995361507381512, + "loss": 3.7795, + "step": 448 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995325950872655, + "loss": 3.5622, + "step": 449 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952902592438237, + "loss": 3.6422, + "step": 450 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952544325047543, + "loss": 3.6076, + "step": 451 + }, + { + "epoch": 0.07, + "learning_rate": 0.00099521847066522, + "loss": 3.7241, + "step": 452 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951823737350313, + "loss": 3.5244, + "step": 453 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951461417240341, + "loss": 3.5843, + "step": 454 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951097746421131, + "loss": 3.6061, + "step": 455 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950732724991885, + "loss": 3.6687, + "step": 456 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950366353052174, + "loss": 3.6813, + "step": 457 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949998630701946, + "loss": 3.4376, + "step": 458 + }, + { + "epoch": 0.07, + "learning_rate": 0.000994962955804151, + "loss": 3.6877, + "step": 459 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949259135171543, + "loss": 3.5971, + "step": 460 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948887362193095, + "loss": 3.5688, + "step": 461 + }, + { + "epoch": 0.07, + "learning_rate": 0.000994851423920758, + "loss": 3.5673, + "step": 462 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948139766316785, + "loss": 3.5814, + "step": 463 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947763943622865, + "loss": 3.6455, + "step": 464 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947386771228333, + "loss": 3.4994, + "step": 465 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009947008249236082, + "loss": 3.5484, + "step": 466 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994662837774937, + "loss": 3.4904, + "step": 467 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994624715687182, + "loss": 3.6338, + "step": 468 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945864586707427, + "loss": 3.5473, + "step": 469 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945480667360549, + "loss": 3.6766, + "step": 470 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994509539893592, + "loss": 3.5347, + "step": 471 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009944708781538635, + "loss": 3.5936, + "step": 472 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994432081527416, + "loss": 3.6759, + "step": 473 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994393150024833, + "loss": 3.4666, + "step": 474 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994354083656734, + "loss": 3.6506, + "step": 475 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943148824337765, + "loss": 3.5375, + "step": 476 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942755463666542, + "loss": 3.4339, + "step": 477 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994236075466097, + "loss": 3.5631, + "step": 478 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994196469742873, + "loss": 3.5491, + "step": 479 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941567292077857, + "loss": 3.5338, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994116853871676, + "loss": 3.6489, + "step": 481 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940768437454218, + "loss": 3.6265, + "step": 482 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940366988399369, + "loss": 3.487, + "step": 483 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993996419166173, + "loss": 3.5466, + "step": 484 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939560047351175, + "loss": 3.6031, + "step": 485 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939154555577954, + "loss": 3.5594, + "step": 486 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938747716452681, + "loss": 3.6552, + "step": 487 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938339530086336, + "loss": 3.571, + "step": 488 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937929996590266, + "loss": 3.5924, + "step": 489 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937519116076194, + "loss": 3.7261, + "step": 490 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099371068886562, + "loss": 3.5448, + "step": 491 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936693314442737, + "loss": 3.5326, + "step": 492 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993627839354862, + "loss": 3.4735, + "step": 493 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935862126087042, + "loss": 3.5275, + "step": 494 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993544451217155, + "loss": 3.7592, + "step": 495 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935025551916067, + "loss": 3.5127, + "step": 496 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934605245434886, + "loss": 3.6119, + "step": 497 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934183592842656, + "loss": 3.3895, + "step": 498 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933760594254403, + "loss": 3.493, + "step": 499 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933336249785514, + "loss": 3.6469, + "step": 500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932910559551748, + "loss": 3.5954, + "step": 501 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993248352366923, + "loss": 3.5558, + "step": 502 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993205514225445, + "loss": 3.4001, + "step": 503 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931625415424265, + "loss": 3.6699, + "step": 504 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099311943432959, + "loss": 3.4861, + "step": 505 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009930761925986953, + "loss": 3.5784, + "step": 506 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009930328163615374, + "loss": 3.6346, + "step": 507 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929893056299496, + "loss": 3.5611, + "step": 508 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992945660415801, + "loss": 3.5395, + "step": 509 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929018807309975, + "loss": 3.6389, + "step": 510 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928579665874817, + "loss": 3.4937, + "step": 511 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992813917997233, + "loss": 3.4859, + "step": 512 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009927697349722676, + "loss": 3.4833, + "step": 513 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992725417524638, + "loss": 3.5376, + "step": 514 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926809656664334, + "loss": 3.5175, + "step": 515 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926363794097804, + "loss": 3.6341, + "step": 516 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992591658766841, + "loss": 3.5758, + "step": 517 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992546803749815, + "loss": 3.5091, + "step": 518 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992501814370938, + "loss": 3.4318, + "step": 519 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009924566906424832, + "loss": 3.4936, + "step": 520 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009924114325767595, + "loss": 3.5361, + "step": 521 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992366040186113, + "loss": 3.6106, + "step": 522 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992320513482926, + "loss": 3.4672, + "step": 523 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922748524796184, + "loss": 3.4872, + "step": 524 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922290571886452, + "loss": 3.6643, + "step": 525 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921831276224996, + "loss": 3.5878, + "step": 526 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921370637937105, + "loss": 3.5023, + "step": 527 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009920908657148435, + "loss": 3.5572, + "step": 528 + }, + { + "epoch": 0.09, + "learning_rate": 0.000992044533398501, + "loss": 3.4962, + "step": 529 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919980668573222, + "loss": 3.5311, + "step": 530 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919514661039827, + "loss": 3.5491, + "step": 531 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919047311511945, + "loss": 3.5165, + "step": 532 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918578620117067, + "loss": 3.4012, + "step": 533 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918108586983044, + "loss": 3.3695, + "step": 534 + }, + { + "epoch": 0.09, + "learning_rate": 0.00099176372122381, + "loss": 3.5244, + "step": 535 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917164496010818, + "loss": 3.6515, + "step": 536 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991669043843015, + "loss": 3.5018, + "step": 537 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991621503962542, + "loss": 3.4892, + "step": 538 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009915738299726305, + "loss": 3.6485, + "step": 539 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991526021886286, + "loss": 3.5323, + "step": 540 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914780797165496, + "loss": 3.4927, + "step": 541 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914300034764999, + "loss": 3.4527, + "step": 542 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913817931792512, + "loss": 3.6255, + "step": 543 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913334488379551, + "loss": 3.605, + "step": 544 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009912849704657996, + "loss": 3.6616, + "step": 545 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009912363580760085, + "loss": 3.428, + "step": 546 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009911876116818433, + "loss": 3.599, + "step": 547 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009911387312966013, + "loss": 3.444, + "step": 548 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910897169336168, + "loss": 3.5013, + "step": 549 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910405686062603, + "loss": 3.5512, + "step": 550 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009909912863279389, + "loss": 3.5392, + "step": 551 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009909418701120965, + "loss": 3.4871, + "step": 552 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908923199722132, + "loss": 3.4046, + "step": 553 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908426359218055, + "loss": 3.5671, + "step": 554 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009907928179744272, + "loss": 3.6767, + "step": 555 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009907428661436678, + "loss": 3.5237, + "step": 556 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906927804431542, + "loss": 3.5909, + "step": 557 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906425608865487, + "loss": 3.585, + "step": 558 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905922074875508, + "loss": 3.4669, + "step": 559 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905417202598968, + "loss": 3.6374, + "step": 560 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904910992173585, + "loss": 3.5054, + "step": 561 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904403443737452, + "loss": 3.5301, + "step": 562 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009903894557429023, + "loss": 3.4528, + "step": 563 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009903384333387116, + "loss": 3.4843, + "step": 564 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902872771750915, + "loss": 3.4409, + "step": 565 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902359872659972, + "loss": 3.4456, + "step": 566 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901845636254196, + "loss": 3.7117, + "step": 567 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901330062673868, + "loss": 3.5793, + "step": 568 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990081315205963, + "loss": 3.5781, + "step": 569 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009900294904552492, + "loss": 3.5186, + "step": 570 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009899775320293827, + "loss": 3.5342, + "step": 571 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989925439942537, + "loss": 3.5156, + "step": 572 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898732142089224, + "loss": 3.5947, + "step": 573 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898208548427855, + "loss": 3.625, + "step": 574 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897683618584098, + "loss": 3.5469, + "step": 575 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897157352701143, + "loss": 3.502, + "step": 576 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009896629750922555, + "loss": 3.5049, + "step": 577 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009896100813392254, + "loss": 3.5742, + "step": 578 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895570540254532, + "loss": 3.5773, + "step": 579 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895038931654041, + "loss": 3.5103, + "step": 580 + }, + { + "epoch": 0.09, + "learning_rate": 0.00098945059877358, + "loss": 3.6207, + "step": 581 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893971708645189, + "loss": 3.5775, + "step": 582 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893436094527953, + "loss": 3.5351, + "step": 583 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892899145530205, + "loss": 3.5495, + "step": 584 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892360861798418, + "loss": 3.5029, + "step": 585 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989182124347943, + "loss": 3.5773, + "step": 586 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009891280290720446, + "loss": 3.6013, + "step": 587 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009890738003669028, + "loss": 3.6577, + "step": 588 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989019438247311, + "loss": 3.5552, + "step": 589 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009889649427280987, + "loss": 3.4023, + "step": 590 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009889103138241314, + "loss": 3.7719, + "step": 591 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888555515503116, + "loss": 3.6188, + "step": 592 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888006559215777, + "loss": 3.5023, + "step": 593 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009887456269529048, + "loss": 3.4684, + "step": 594 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988690464659304, + "loss": 3.5242, + "step": 595 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009886351690558237, + "loss": 3.588, + "step": 596 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885797401575473, + "loss": 3.5065, + "step": 597 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885241779795954, + "loss": 3.6716, + "step": 598 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988468482537125, + "loss": 3.5966, + "step": 599 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009884126538453294, + "loss": 3.4471, + "step": 600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883566919194375, + "loss": 3.5159, + "step": 601 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883005967747157, + "loss": 3.651, + "step": 602 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988244368426466, + "loss": 3.5517, + "step": 603 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988188006890027, + "loss": 3.4959, + "step": 604 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009881315121807734, + "loss": 3.5669, + "step": 605 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009880748843141168, + "loss": 3.6149, + "step": 606 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988018123305504, + "loss": 3.5215, + "step": 607 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009879612291704197, + "loss": 3.5427, + "step": 608 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009879042019243834, + "loss": 3.4381, + "step": 609 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009878470415829517, + "loss": 3.6386, + "step": 610 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009877897481617175, + "loss": 3.4823, + "step": 611 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009877323216763098, + "loss": 3.5773, + "step": 612 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009876747621423941, + "loss": 3.4238, + "step": 613 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987617069575672, + "loss": 3.5172, + "step": 614 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009875592439918812, + "loss": 3.5516, + "step": 615 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009875012854067965, + "loss": 3.4397, + "step": 616 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874431938362276, + "loss": 3.4421, + "step": 617 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009873849692960221, + "loss": 3.7115, + "step": 618 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009873266118020627, + "loss": 3.5163, + "step": 619 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009872681213702688, + "loss": 3.4918, + "step": 620 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987209498016596, + "loss": 3.4362, + "step": 621 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871507417570362, + "loss": 3.45, + "step": 622 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009870918526076176, + "loss": 3.5738, + "step": 623 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009870328305844043, + "loss": 3.4739, + "step": 624 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869736757034972, + "loss": 3.4735, + "step": 625 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986914387981033, + "loss": 3.4475, + "step": 626 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009868549674331847, + "loss": 3.4443, + "step": 627 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986795414076162, + "loss": 3.4398, + "step": 628 + }, + { + "epoch": 0.1, + "learning_rate": 0.00098673572792621, + "loss": 3.5642, + "step": 629 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009866759089996108, + "loss": 3.4707, + "step": 630 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009866159573126824, + "loss": 3.4429, + "step": 631 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009865558728817789, + "loss": 3.5282, + "step": 632 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009864956557232908, + "loss": 3.5258, + "step": 633 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009864353058536446, + "loss": 3.4504, + "step": 634 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009863748232893034, + "loss": 3.532, + "step": 635 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009863142080467663, + "loss": 3.5252, + "step": 636 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986253460142568, + "loss": 3.3678, + "step": 637 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009861925795932804, + "loss": 3.5199, + "step": 638 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986131566415511, + "loss": 3.558, + "step": 639 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009860704206259034, + "loss": 3.4935, + "step": 640 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009860091422411379, + "loss": 3.5435, + "step": 641 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009859477312779302, + "loss": 3.5333, + "step": 642 + }, + { + "epoch": 0.1, + "learning_rate": 0.000985886187753033, + "loss": 3.5841, + "step": 643 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009858245116832346, + "loss": 3.5702, + "step": 644 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009857627030853595, + "loss": 3.5011, + "step": 645 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009857007619762687, + "loss": 3.5814, + "step": 646 + }, + { + "epoch": 0.1, + "learning_rate": 0.000985638688372859, + "loss": 3.4576, + "step": 647 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009855764822920634, + "loss": 3.5555, + "step": 648 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009855141437508513, + "loss": 3.4445, + "step": 649 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009854516727662277, + "loss": 3.591, + "step": 650 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009853890693552344, + "loss": 3.5724, + "step": 651 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009853263335349487, + "loss": 3.4615, + "step": 652 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009852634653224846, + "loss": 3.4789, + "step": 653 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009852004647349917, + "loss": 3.4826, + "step": 654 + }, + { + "epoch": 0.11, + "learning_rate": 0.000985137331789656, + "loss": 3.5983, + "step": 655 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009850740665036993, + "loss": 3.5091, + "step": 656 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009850106688943803, + "loss": 3.5704, + "step": 657 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009849471389789927, + "loss": 3.458, + "step": 658 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009848834767748672, + "loss": 3.5689, + "step": 659 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009848196822993698, + "loss": 3.4767, + "step": 660 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009847557555699034, + "loss": 3.5393, + "step": 661 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009846916966039063, + "loss": 3.5799, + "step": 662 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009846275054188533, + "loss": 3.5126, + "step": 663 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984563182032255, + "loss": 3.4894, + "step": 664 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009844987264616585, + "loss": 3.5242, + "step": 665 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009844341387246463, + "loss": 3.5015, + "step": 666 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009843694188388374, + "loss": 3.4522, + "step": 667 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009843045668218866, + "loss": 3.4047, + "step": 668 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984239582691485, + "loss": 3.3793, + "step": 669 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009841744664653596, + "loss": 3.6124, + "step": 670 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009841092181612737, + "loss": 3.4762, + "step": 671 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984043837797026, + "loss": 3.5768, + "step": 672 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009839783253904517, + "loss": 3.4672, + "step": 673 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983912680959422, + "loss": 3.5286, + "step": 674 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009838469045218442, + "loss": 3.5863, + "step": 675 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983780996095661, + "loss": 3.5107, + "step": 676 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009837149556988524, + "loss": 3.6364, + "step": 677 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009836487833494328, + "loss": 3.5758, + "step": 678 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009835824790654535, + "loss": 3.4237, + "step": 679 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009835160428650017, + "loss": 3.3763, + "step": 680 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009834494747662007, + "loss": 3.6611, + "step": 681 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009833827747872093, + "loss": 3.3836, + "step": 682 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009833159429462229, + "loss": 3.3775, + "step": 683 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009832489792614722, + "loss": 3.5003, + "step": 684 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009831818837512248, + "loss": 3.5504, + "step": 685 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983114656433783, + "loss": 3.4241, + "step": 686 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009830472973274861, + "loss": 3.5479, + "step": 687 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982979806450709, + "loss": 3.4593, + "step": 688 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982912183821863, + "loss": 3.5261, + "step": 689 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009828444294593937, + "loss": 3.4643, + "step": 690 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009827765433817848, + "loss": 3.6277, + "step": 691 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009827085256075545, + "loss": 3.6039, + "step": 692 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009826403761552579, + "loss": 3.4462, + "step": 693 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009825720950434847, + "loss": 3.6047, + "step": 694 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009825036822908616, + "loss": 3.3855, + "step": 695 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009824351379160514, + "loss": 3.5885, + "step": 696 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009823664619377516, + "loss": 3.5475, + "step": 697 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982297654374697, + "loss": 3.5979, + "step": 698 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009822287152456571, + "loss": 3.5535, + "step": 699 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982159644569438, + "loss": 3.5769, + "step": 700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009820904423648816, + "loss": 3.5396, + "step": 701 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009820211086508652, + "loss": 3.427, + "step": 702 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009819516434463028, + "loss": 3.4448, + "step": 703 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009818820467701434, + "loss": 3.4626, + "step": 704 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009818123186413725, + "loss": 3.3334, + "step": 705 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009817424590790114, + "loss": 3.4315, + "step": 706 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009816724681021168, + "loss": 3.5492, + "step": 707 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009816023457297819, + "loss": 3.5175, + "step": 708 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009815320919811348, + "loss": 3.4223, + "step": 709 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009814617068753405, + "loss": 3.5122, + "step": 710 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009813911904315992, + "loss": 3.5805, + "step": 711 + }, + { + "epoch": 0.11, + "learning_rate": 0.000981320542669147, + "loss": 3.5595, + "step": 712 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009812497636072562, + "loss": 3.4128, + "step": 713 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009811788532652342, + "loss": 3.4413, + "step": 714 + }, + { + "epoch": 0.12, + "learning_rate": 0.000981107811662425, + "loss": 3.54, + "step": 715 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009810366388182078, + "loss": 3.6898, + "step": 716 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980965334751998, + "loss": 3.5101, + "step": 717 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009808938994832464, + "loss": 3.5096, + "step": 718 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009808223330314402, + "loss": 3.4713, + "step": 719 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009807506354161017, + "loss": 3.3966, + "step": 720 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009806788066567896, + "loss": 3.4621, + "step": 721 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009806068467730976, + "loss": 3.5022, + "step": 722 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980534755784656, + "loss": 3.392, + "step": 723 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009804625337111305, + "loss": 3.3015, + "step": 724 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009803901805722224, + "loss": 3.5334, + "step": 725 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009803176963876692, + "loss": 3.3377, + "step": 726 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009802450811772433, + "loss": 3.5432, + "step": 727 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009801723349607539, + "loss": 3.3393, + "step": 728 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009800994577580453, + "loss": 3.4604, + "step": 729 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009800264495889977, + "loss": 3.4806, + "step": 730 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979953310473527, + "loss": 3.5224, + "step": 731 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009798800404315848, + "loss": 3.347, + "step": 732 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009798066394831584, + "loss": 3.4114, + "step": 733 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979733107648271, + "loss": 3.4063, + "step": 734 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009796594449469809, + "loss": 3.4683, + "step": 735 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979585651399383, + "loss": 3.4805, + "step": 736 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009795117270256074, + "loss": 3.3007, + "step": 737 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009794376718458197, + "loss": 3.3117, + "step": 738 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979363485880222, + "loss": 3.3804, + "step": 739 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009792891691490506, + "loss": 3.4172, + "step": 740 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009792147216725789, + "loss": 3.5357, + "step": 741 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009791401434711153, + "loss": 3.4155, + "step": 742 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979065434565004, + "loss": 3.483, + "step": 743 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009789905949746252, + "loss": 3.4455, + "step": 744 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009789156247203938, + "loss": 3.4546, + "step": 745 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009788405238227611, + "loss": 3.5603, + "step": 746 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978765292302214, + "loss": 3.435, + "step": 747 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009786899301792752, + "loss": 3.4021, + "step": 748 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009786144374745022, + "loss": 3.3085, + "step": 749 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978538814208489, + "loss": 3.7027, + "step": 750 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009784630604018647, + "loss": 3.4932, + "step": 751 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978387176075294, + "loss": 3.5908, + "step": 752 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978311161249478, + "loss": 3.3883, + "step": 753 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009782350159451521, + "loss": 3.5029, + "step": 754 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009781587401830885, + "loss": 3.4665, + "step": 755 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978082333984094, + "loss": 3.483, + "step": 756 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978005797369012, + "loss": 3.476, + "step": 757 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009779291303587203, + "loss": 3.431, + "step": 758 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009778523329741335, + "loss": 3.4099, + "step": 759 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009777754052362007, + "loss": 3.534, + "step": 760 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009776983471659073, + "loss": 3.4808, + "step": 761 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009776211587842738, + "loss": 3.5587, + "step": 762 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009775438401123566, + "loss": 3.4687, + "step": 763 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977466391171247, + "loss": 3.4258, + "step": 764 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977388811982073, + "loss": 3.4678, + "step": 765 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009773111025659972, + "loss": 3.4692, + "step": 766 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009772332629442175, + "loss": 3.4615, + "step": 767 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977155293137968, + "loss": 3.4073, + "step": 768 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009770771931685185, + "loss": 3.613, + "step": 769 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009769989630571737, + "loss": 3.6737, + "step": 770 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009769206028252735, + "loss": 3.5279, + "step": 771 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009768421124941944, + "loss": 3.5014, + "step": 772 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009767634920853474, + "loss": 3.4013, + "step": 773 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009766847416201796, + "loss": 3.4528, + "step": 774 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009766058611201732, + "loss": 3.395, + "step": 775 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009765268506068459, + "loss": 3.4454, + "step": 776 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009764477101017514, + "loss": 3.4507, + "step": 777 + }, + { + "epoch": 0.13, + "learning_rate": 0.000976368439626478, + "loss": 3.4283, + "step": 778 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009762890392026502, + "loss": 3.4319, + "step": 779 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009762095088519274, + "loss": 3.3616, + "step": 780 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009761298485960048, + "loss": 3.3573, + "step": 781 + }, + { + "epoch": 0.13, + "learning_rate": 0.000976050058456613, + "loss": 3.3781, + "step": 782 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009759701384555177, + "loss": 3.537, + "step": 783 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009758900886145205, + "loss": 3.5197, + "step": 784 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009758099089554584, + "loss": 3.5167, + "step": 785 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009757295995002031, + "loss": 3.5496, + "step": 786 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009756491602706625, + "loss": 3.4181, + "step": 787 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009755685912887798, + "loss": 3.3405, + "step": 788 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009754878925765329, + "loss": 3.3718, + "step": 789 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009754070641559362, + "loss": 3.5102, + "step": 790 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009753261060490384, + "loss": 3.4478, + "step": 791 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009752450182779243, + "loss": 3.4663, + "step": 792 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009751638008647139, + "loss": 3.4649, + "step": 793 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009750824538315623, + "loss": 3.4826, + "step": 794 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009750009772006603, + "loss": 3.6714, + "step": 795 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009749193709942339, + "loss": 3.4362, + "step": 796 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009748376352345443, + "loss": 3.5046, + "step": 797 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009747557699438884, + "loss": 3.4076, + "step": 798 + }, + { + "epoch": 0.13, + "learning_rate": 0.000974673775144598, + "loss": 3.608, + "step": 799 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009745916508590408, + "loss": 3.4245, + "step": 800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009745093971096193, + "loss": 3.362, + "step": 801 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009744270139187716, + "loss": 3.5268, + "step": 802 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009743445013089707, + "loss": 3.5477, + "step": 803 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009742618593027255, + "loss": 3.5585, + "step": 804 + }, + { + "epoch": 0.13, + "learning_rate": 0.00097417908792258, + "loss": 3.5237, + "step": 805 + }, + { + "epoch": 0.13, + "learning_rate": 0.000974096187191113, + "loss": 3.4769, + "step": 806 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009740131571309394, + "loss": 3.5084, + "step": 807 + }, + { + "epoch": 0.13, + "learning_rate": 0.000973929997764709, + "loss": 3.361, + "step": 808 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009738467091151063, + "loss": 3.4713, + "step": 809 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009737632912048522, + "loss": 3.3813, + "step": 810 + }, + { + "epoch": 0.13, + "learning_rate": 0.000973679744056702, + "loss": 3.5314, + "step": 811 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009735960676934465, + "loss": 3.5343, + "step": 812 + }, + { + "epoch": 0.13, + "learning_rate": 0.000973512262137912, + "loss": 3.5633, + "step": 813 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009734283274129595, + "loss": 3.4784, + "step": 814 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009733442635414858, + "loss": 3.5808, + "step": 815 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009732600705464228, + "loss": 3.4311, + "step": 816 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009731757484507371, + "loss": 3.4009, + "step": 817 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009730912972774311, + "loss": 3.4182, + "step": 818 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009730067170495425, + "loss": 3.3137, + "step": 819 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009729220077901435, + "loss": 3.4959, + "step": 820 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009728371695223424, + "loss": 3.4695, + "step": 821 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009727522022692817, + "loss": 3.5425, + "step": 822 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009726671060541401, + "loss": 3.5181, + "step": 823 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009725818809001308, + "loss": 3.4539, + "step": 824 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009724965268305024, + "loss": 3.4355, + "step": 825 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009724110438685386, + "loss": 3.4231, + "step": 826 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009723254320375583, + "loss": 3.3532, + "step": 827 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009722396913609157, + "loss": 3.5464, + "step": 828 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009721538218619997, + "loss": 3.4366, + "step": 829 + }, + { + "epoch": 0.13, + "learning_rate": 0.000972067823564235, + "loss": 3.3732, + "step": 830 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009719816964910809, + "loss": 3.3369, + "step": 831 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009718954406660317, + "loss": 3.4348, + "step": 832 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009718090561126176, + "loss": 3.4791, + "step": 833 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009717225428544034, + "loss": 3.3915, + "step": 834 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009716359009149889, + "loss": 3.5801, + "step": 835 + }, + { + "epoch": 0.13, + "learning_rate": 0.000971549130318009, + "loss": 3.2591, + "step": 836 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009714622310871342, + "loss": 3.4877, + "step": 837 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009713752032460694, + "loss": 3.5312, + "step": 838 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009712880468185552, + "loss": 3.3617, + "step": 839 + }, + { + "epoch": 0.14, + "learning_rate": 0.000971200761828367, + "loss": 3.5226, + "step": 840 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009711133482993151, + "loss": 3.3586, + "step": 841 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009710258062552451, + "loss": 3.579, + "step": 842 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009709381357200376, + "loss": 3.489, + "step": 843 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009708503367176083, + "loss": 3.4277, + "step": 844 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009707624092719078, + "loss": 3.3935, + "step": 845 + }, + { + "epoch": 0.14, + "learning_rate": 0.000970674353406922, + "loss": 3.5132, + "step": 846 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009705861691466716, + "loss": 3.3905, + "step": 847 + }, + { + "epoch": 0.14, + "learning_rate": 0.000970497856515212, + "loss": 3.4973, + "step": 848 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009704094155366345, + "loss": 3.4043, + "step": 849 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009703208462350649, + "loss": 3.4351, + "step": 850 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009702321486346637, + "loss": 3.5083, + "step": 851 + }, + { + "epoch": 0.14, + "learning_rate": 0.000970143322759627, + "loss": 3.4914, + "step": 852 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009700543686341854, + "loss": 3.5055, + "step": 853 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009699652862826049, + "loss": 3.3615, + "step": 854 + }, + { + "epoch": 0.14, + "learning_rate": 0.000969876075729186, + "loss": 3.4662, + "step": 855 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009697867369982647, + "loss": 3.4596, + "step": 856 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009696972701142118, + "loss": 3.4821, + "step": 857 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009696076751014327, + "loss": 3.4982, + "step": 858 + }, + { + "epoch": 0.14, + "learning_rate": 0.000969517951984368, + "loss": 3.4358, + "step": 859 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009694281007874935, + "loss": 3.4106, + "step": 860 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009693381215353196, + "loss": 3.4424, + "step": 861 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009692480142523918, + "loss": 3.5152, + "step": 862 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009691577789632904, + "loss": 3.4181, + "step": 863 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009690674156926308, + "loss": 3.4328, + "step": 864 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009689769244650631, + "loss": 3.4373, + "step": 865 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009688863053052722, + "loss": 3.4083, + "step": 866 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009687955582379785, + "loss": 3.5364, + "step": 867 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009687046832879367, + "loss": 3.4056, + "step": 868 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009686136804799365, + "loss": 3.5085, + "step": 869 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009685225498388026, + "loss": 3.4764, + "step": 870 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009684312913893947, + "loss": 3.3951, + "step": 871 + }, + { + "epoch": 0.14, + "learning_rate": 0.000968339905156607, + "loss": 3.5044, + "step": 872 + }, + { + "epoch": 0.14, + "learning_rate": 0.000968248391165369, + "loss": 3.3641, + "step": 873 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009681567494406445, + "loss": 3.4339, + "step": 874 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009680649800074323, + "loss": 3.5734, + "step": 875 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009679730828907669, + "loss": 3.5639, + "step": 876 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009678810581157162, + "loss": 3.4469, + "step": 877 + }, + { + "epoch": 0.14, + "learning_rate": 0.000967788905707384, + "loss": 3.559, + "step": 878 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009676966256909085, + "loss": 3.5335, + "step": 879 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009676042180914626, + "loss": 3.5049, + "step": 880 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009675116829342543, + "loss": 3.4148, + "step": 881 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009674190202445264, + "loss": 3.2746, + "step": 882 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009673262300475562, + "loss": 3.4149, + "step": 883 + }, + { + "epoch": 0.14, + "learning_rate": 0.000967233312368656, + "loss": 3.4076, + "step": 884 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009671402672331727, + "loss": 3.3937, + "step": 885 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009670470946664883, + "loss": 3.4546, + "step": 886 + }, + { + "epoch": 0.14, + "learning_rate": 0.000966953794694019, + "loss": 3.5755, + "step": 887 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009668603673412164, + "loss": 3.3033, + "step": 888 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009667668126335665, + "loss": 3.4493, + "step": 889 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009666731305965901, + "loss": 3.4355, + "step": 890 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009665793212558427, + "loss": 3.4193, + "step": 891 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009664853846369143, + "loss": 3.4251, + "step": 892 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009663913207654303, + "loss": 3.4735, + "step": 893 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009662971296670502, + "loss": 3.5888, + "step": 894 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009662028113674683, + "loss": 3.2766, + "step": 895 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009661083658924139, + "loss": 3.4096, + "step": 896 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009660137932676505, + "loss": 3.4307, + "step": 897 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009659190935189768, + "loss": 3.3639, + "step": 898 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009658242666722258, + "loss": 3.4123, + "step": 899 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009657293127532654, + "loss": 3.3955, + "step": 900 + }, + { + "epoch": 0.15, + "learning_rate": 0.000965634231787998, + "loss": 3.3262, + "step": 901 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009655390238023609, + "loss": 3.3778, + "step": 902 + }, + { + "epoch": 0.15, + "learning_rate": 0.000965443688822326, + "loss": 3.5448, + "step": 903 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009653482268738991, + "loss": 3.5188, + "step": 904 + }, + { + "epoch": 0.15, + "learning_rate": 0.000965252637983122, + "loss": 3.3605, + "step": 905 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009651569221760699, + "loss": 3.4631, + "step": 906 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009650610794788533, + "loss": 3.3609, + "step": 907 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009649651099176172, + "loss": 3.2997, + "step": 908 + }, + { + "epoch": 0.15, + "learning_rate": 0.000964869013518541, + "loss": 3.5119, + "step": 909 + }, + { + "epoch": 0.15, + "learning_rate": 0.000964772790307839, + "loss": 3.443, + "step": 910 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009646764403117598, + "loss": 3.4665, + "step": 911 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009645799635565869, + "loss": 3.4322, + "step": 912 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009644833600686378, + "loss": 3.3975, + "step": 913 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009643866298742654, + "loss": 3.3586, + "step": 914 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009642897729998563, + "loss": 3.4003, + "step": 915 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009641927894718325, + "loss": 3.4892, + "step": 916 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009640956793166501, + "loss": 3.5207, + "step": 917 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009639984425607995, + "loss": 3.4757, + "step": 918 + }, + { + "epoch": 0.15, + "learning_rate": 0.000963901079230806, + "loss": 3.5099, + "step": 919 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009638035893532297, + "loss": 3.4433, + "step": 920 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009637059729546645, + "loss": 3.4504, + "step": 921 + }, + { + "epoch": 0.15, + "learning_rate": 0.000963608230061739, + "loss": 3.4371, + "step": 922 + }, + { + "epoch": 0.15, + "learning_rate": 0.000963510360701117, + "loss": 3.4395, + "step": 923 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009634123648994959, + "loss": 3.4227, + "step": 924 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009633142426836081, + "loss": 3.4331, + "step": 925 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009632159940802205, + "loss": 3.4253, + "step": 926 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009631176191161341, + "loss": 3.5397, + "step": 927 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009630191178181848, + "loss": 3.4202, + "step": 928 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009629204902132425, + "loss": 3.4039, + "step": 929 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009628217363282123, + "loss": 3.3886, + "step": 930 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009627228561900328, + "loss": 3.4761, + "step": 931 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009626238498256776, + "loss": 3.4573, + "step": 932 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009625247172621549, + "loss": 3.4066, + "step": 933 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009624254585265068, + "loss": 3.4431, + "step": 934 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009623260736458104, + "loss": 3.3837, + "step": 935 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009622265626471766, + "loss": 3.3437, + "step": 936 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009621269255577512, + "loss": 3.462, + "step": 937 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009620271624047141, + "loss": 3.589, + "step": 938 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009619272732152796, + "loss": 3.4145, + "step": 939 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009618272580166967, + "loss": 3.4257, + "step": 940 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009617271168362485, + "loss": 3.5116, + "step": 941 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009616268497012523, + "loss": 3.4112, + "step": 942 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009615264566390602, + "loss": 3.2934, + "step": 943 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009614259376770584, + "loss": 3.4571, + "step": 944 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009613252928426674, + "loss": 3.3936, + "step": 945 + }, + { + "epoch": 0.15, + "learning_rate": 0.000961224522163342, + "loss": 3.4374, + "step": 946 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009611236256665718, + "loss": 3.5991, + "step": 947 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009610226033798799, + "loss": 3.5437, + "step": 948 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009609214553308246, + "loss": 3.305, + "step": 949 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009608201815469977, + "loss": 3.2431, + "step": 950 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009607187820560258, + "loss": 3.3745, + "step": 951 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009606172568855698, + "loss": 3.52, + "step": 952 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009605156060633246, + "loss": 3.3794, + "step": 953 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009604138296170197, + "loss": 3.5553, + "step": 954 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009603119275744188, + "loss": 3.3365, + "step": 955 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009602098999633192, + "loss": 3.2153, + "step": 956 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009601077468115535, + "loss": 3.3288, + "step": 957 + }, + { + "epoch": 0.15, + "learning_rate": 0.000960005468146988, + "loss": 3.4794, + "step": 958 + }, + { + "epoch": 0.15, + "learning_rate": 0.000959903063997523, + "loss": 3.4947, + "step": 959 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009598005343910938, + "loss": 3.4697, + "step": 960 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009596978793556693, + "loss": 3.5168, + "step": 961 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009595950989192524, + "loss": 3.3613, + "step": 962 + }, + { + "epoch": 0.16, + "learning_rate": 0.000959492193109881, + "loss": 3.476, + "step": 963 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009593891619556265, + "loss": 3.4309, + "step": 964 + }, + { + "epoch": 0.16, + "learning_rate": 0.000959286005484595, + "loss": 3.1764, + "step": 965 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009591827237249264, + "loss": 3.3749, + "step": 966 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009590793167047951, + "loss": 3.4264, + "step": 967 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009589757844524094, + "loss": 3.5059, + "step": 968 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009588721269960118, + "loss": 3.3735, + "step": 969 + }, + { + "epoch": 0.16, + "learning_rate": 0.000958768344363879, + "loss": 3.3721, + "step": 970 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009586644365843221, + "loss": 3.4304, + "step": 971 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009585604036856859, + "loss": 3.2022, + "step": 972 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009584562456963496, + "loss": 3.3411, + "step": 973 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009583519626447265, + "loss": 3.5984, + "step": 974 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009582475545592637, + "loss": 3.4496, + "step": 975 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009581430214684431, + "loss": 3.4124, + "step": 976 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009580383634007801, + "loss": 3.3729, + "step": 977 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009579335803848244, + "loss": 3.4016, + "step": 978 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009578286724491595, + "loss": 3.4017, + "step": 979 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009577236396224036, + "loss": 3.3784, + "step": 980 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009576184819332085, + "loss": 3.5411, + "step": 981 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009575131994102601, + "loss": 3.3887, + "step": 982 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009574077920822784, + "loss": 3.2414, + "step": 983 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009573022599780177, + "loss": 3.4576, + "step": 984 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009571966031262657, + "loss": 3.4938, + "step": 985 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009570908215558449, + "loss": 3.483, + "step": 986 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009569849152956114, + "loss": 3.3474, + "step": 987 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009568788843744552, + "loss": 3.3386, + "step": 988 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009567727288213005, + "loss": 3.4209, + "step": 989 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009566664486651055, + "loss": 3.6608, + "step": 990 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009565600439348625, + "loss": 3.5509, + "step": 991 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009564535146595976, + "loss": 3.2398, + "step": 992 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009563468608683709, + "loss": 3.3412, + "step": 993 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009562400825902764, + "loss": 3.4999, + "step": 994 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009561331798544422, + "loss": 3.5068, + "step": 995 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009560261526900303, + "loss": 3.4056, + "step": 996 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009559190011262369, + "loss": 3.4596, + "step": 997 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009558117251922913, + "loss": 3.283, + "step": 998 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009557043249174578, + "loss": 3.4376, + "step": 999 + }, + { + "epoch": 0.16, + "learning_rate": 0.000955596800331034, + "loss": 3.3436, + "step": 1000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009554891514623514, + "loss": 3.362, + "step": 1001 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009553813783407756, + "loss": 3.5383, + "step": 1002 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009552734809957062, + "loss": 3.4304, + "step": 1003 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009551654594565763, + "loss": 3.378, + "step": 1004 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009550573137528532, + "loss": 3.2956, + "step": 1005 + }, + { + "epoch": 0.16, + "learning_rate": 0.000954949043914038, + "loss": 3.4008, + "step": 1006 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009548406499696653, + "loss": 3.1183, + "step": 1007 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009547321319493045, + "loss": 3.4021, + "step": 1008 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009546234898825578, + "loss": 3.4357, + "step": 1009 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009545147237990617, + "loss": 3.4669, + "step": 1010 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009544058337284867, + "loss": 3.4969, + "step": 1011 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009542968197005366, + "loss": 3.3618, + "step": 1012 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009541876817449498, + "loss": 3.481, + "step": 1013 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009540784198914976, + "loss": 3.3194, + "step": 1014 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009539690341699857, + "loss": 3.5061, + "step": 1015 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009538595246102535, + "loss": 3.3064, + "step": 1016 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009537498912421741, + "loss": 3.4014, + "step": 1017 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009536401340956542, + "loss": 3.5274, + "step": 1018 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009535302532006348, + "loss": 3.4335, + "step": 1019 + }, + { + "epoch": 0.16, + "learning_rate": 0.00095342024858709, + "loss": 3.3156, + "step": 1020 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009533101202850282, + "loss": 3.3732, + "step": 1021 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009531998683244911, + "loss": 3.3095, + "step": 1022 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009530894927355545, + "loss": 3.4049, + "step": 1023 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009529789935483275, + "loss": 3.533, + "step": 1024 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009528683707929535, + "loss": 3.4015, + "step": 1025 + }, + { + "epoch": 0.17, + "learning_rate": 0.000952757624499609, + "loss": 3.561, + "step": 1026 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009526467546985048, + "loss": 3.392, + "step": 1027 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009525357614198848, + "loss": 3.357, + "step": 1028 + }, + { + "epoch": 0.17, + "learning_rate": 0.000952424644694027, + "loss": 3.5334, + "step": 1029 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009523134045512429, + "loss": 3.3527, + "step": 1030 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009522020410218775, + "loss": 3.3857, + "step": 1031 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009520905541363099, + "loss": 3.3311, + "step": 1032 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009519789439249527, + "loss": 3.4424, + "step": 1033 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009518672104182517, + "loss": 3.4414, + "step": 1034 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009517553536466872, + "loss": 3.5108, + "step": 1035 + }, + { + "epoch": 0.17, + "learning_rate": 0.000951643373640772, + "loss": 3.3373, + "step": 1036 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009515312704310535, + "loss": 3.4415, + "step": 1037 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009514190440481123, + "loss": 3.397, + "step": 1038 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009513066945225626, + "loss": 3.3632, + "step": 1039 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009511942218850523, + "loss": 3.4813, + "step": 1040 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009510816261662627, + "loss": 3.3691, + "step": 1041 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009509689073969088, + "loss": 3.296, + "step": 1042 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009508560656077392, + "loss": 3.394, + "step": 1043 + }, + { + "epoch": 0.17, + "learning_rate": 0.000950743100829536, + "loss": 3.426, + "step": 1044 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009506300130931149, + "loss": 3.5267, + "step": 1045 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009505168024293249, + "loss": 3.4061, + "step": 1046 + }, + { + "epoch": 0.17, + "learning_rate": 0.000950403468869049, + "loss": 3.4177, + "step": 1047 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009502900124432035, + "loss": 3.383, + "step": 1048 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009501764331827378, + "loss": 3.3181, + "step": 1049 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009500627311186356, + "loss": 3.3809, + "step": 1050 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009499489062819136, + "loss": 3.4403, + "step": 1051 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009498349587036219, + "loss": 3.4243, + "step": 1052 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009497208884148444, + "loss": 3.3877, + "step": 1053 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009496066954466983, + "loss": 3.4378, + "step": 1054 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009494923798303343, + "loss": 3.4132, + "step": 1055 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009493779415969365, + "loss": 3.4078, + "step": 1056 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009492633807777227, + "loss": 3.5369, + "step": 1057 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009491486974039438, + "loss": 3.4259, + "step": 1058 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009490338915068841, + "loss": 3.5392, + "step": 1059 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009489189631178618, + "loss": 3.4965, + "step": 1060 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009488039122682283, + "loss": 3.4576, + "step": 1061 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009486887389893679, + "loss": 3.3752, + "step": 1062 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009485734433126991, + "loss": 3.4839, + "step": 1063 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009484580252696733, + "loss": 3.4843, + "step": 1064 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009483424848917753, + "loss": 3.3902, + "step": 1065 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009482268222105234, + "loss": 3.3114, + "step": 1066 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009481110372574693, + "loss": 3.4135, + "step": 1067 + }, + { + "epoch": 0.17, + "learning_rate": 0.000947995130064198, + "loss": 3.4888, + "step": 1068 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009478791006623276, + "loss": 3.4122, + "step": 1069 + }, + { + "epoch": 0.17, + "learning_rate": 0.00094776294908351, + "loss": 3.3093, + "step": 1070 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009476466753594302, + "loss": 3.3862, + "step": 1071 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009475302795218061, + "loss": 3.423, + "step": 1072 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009474137616023899, + "loss": 3.4152, + "step": 1073 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009472971216329662, + "loss": 3.3902, + "step": 1074 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009471803596453535, + "loss": 3.2852, + "step": 1075 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009470634756714027, + "loss": 3.3719, + "step": 1076 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009469464697429992, + "loss": 3.3613, + "step": 1077 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009468293418920608, + "loss": 3.4722, + "step": 1078 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009467120921505388, + "loss": 3.3449, + "step": 1079 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009465947205504178, + "loss": 3.2864, + "step": 1080 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009464772271237155, + "loss": 3.4195, + "step": 1081 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009463596119024831, + "loss": 3.3884, + "step": 1082 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009462418749188048, + "loss": 3.5345, + "step": 1083 + }, + { + "epoch": 0.17, + "learning_rate": 0.000946124016204798, + "loss": 3.4601, + "step": 1084 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009460060357926134, + "loss": 3.3562, + "step": 1085 + }, + { + "epoch": 0.18, + "learning_rate": 0.000945887933714435, + "loss": 3.2827, + "step": 1086 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009457697100024798, + "loss": 3.5575, + "step": 1087 + }, + { + "epoch": 0.18, + "learning_rate": 0.000945651364688998, + "loss": 3.3943, + "step": 1088 + }, + { + "epoch": 0.18, + "learning_rate": 0.000945532897806273, + "loss": 3.4887, + "step": 1089 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009454143093866216, + "loss": 3.4859, + "step": 1090 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009452955994623932, + "loss": 3.3214, + "step": 1091 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009451767680659709, + "loss": 3.287, + "step": 1092 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009450578152297706, + "loss": 3.4778, + "step": 1093 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009449387409862415, + "loss": 3.3501, + "step": 1094 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009448195453678661, + "loss": 3.3924, + "step": 1095 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009447002284071593, + "loss": 3.4862, + "step": 1096 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009445807901366699, + "loss": 3.4371, + "step": 1097 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009444612305889795, + "loss": 3.5001, + "step": 1098 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009443415497967026, + "loss": 3.3799, + "step": 1099 + }, + { + "epoch": 0.18, + "learning_rate": 0.000944221747792487, + "loss": 3.428, + "step": 1100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009441018246090134, + "loss": 3.4268, + "step": 1101 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009439817802789957, + "loss": 3.4337, + "step": 1102 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009438616148351809, + "loss": 3.3007, + "step": 1103 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009437413283103486, + "loss": 3.3873, + "step": 1104 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009436209207373123, + "loss": 3.4679, + "step": 1105 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009435003921489176, + "loss": 3.4424, + "step": 1106 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009433797425780435, + "loss": 3.4173, + "step": 1107 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009432589720576021, + "loss": 3.433, + "step": 1108 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009431380806205385, + "loss": 3.4495, + "step": 1109 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009430170682998305, + "loss": 3.5405, + "step": 1110 + }, + { + "epoch": 0.18, + "learning_rate": 0.000942895935128489, + "loss": 3.3693, + "step": 1111 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009427746811395581, + "loss": 3.3385, + "step": 1112 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009426533063661147, + "loss": 3.3986, + "step": 1113 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009425318108412684, + "loss": 3.4789, + "step": 1114 + }, + { + "epoch": 0.18, + "learning_rate": 0.000942410194598162, + "loss": 3.4064, + "step": 1115 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009422884576699715, + "loss": 3.4204, + "step": 1116 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009421666000899052, + "loss": 3.3817, + "step": 1117 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009420446218912047, + "loss": 3.3647, + "step": 1118 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009419225231071446, + "loss": 3.2934, + "step": 1119 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009418003037710321, + "loss": 3.4512, + "step": 1120 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009416779639162072, + "loss": 3.2144, + "step": 1121 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009415555035760434, + "loss": 3.4466, + "step": 1122 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009414329227839464, + "loss": 3.5094, + "step": 1123 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009413102215733553, + "loss": 3.4368, + "step": 1124 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009411873999777414, + "loss": 3.352, + "step": 1125 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009410644580306092, + "loss": 3.2927, + "step": 1126 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009409413957654964, + "loss": 3.3452, + "step": 1127 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009408182132159728, + "loss": 3.4336, + "step": 1128 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009406949104156417, + "loss": 3.3229, + "step": 1129 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009405714873981386, + "loss": 3.4584, + "step": 1130 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009404479441971321, + "loss": 3.5503, + "step": 1131 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009403242808463236, + "loss": 3.3838, + "step": 1132 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009402004973794474, + "loss": 3.3587, + "step": 1133 + }, + { + "epoch": 0.18, + "learning_rate": 0.00094007659383027, + "loss": 3.3125, + "step": 1134 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009399525702325915, + "loss": 3.3735, + "step": 1135 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009398284266202439, + "loss": 3.514, + "step": 1136 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009397041630270926, + "loss": 3.4996, + "step": 1137 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009395797794870354, + "loss": 3.328, + "step": 1138 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009394552760340029, + "loss": 3.4831, + "step": 1139 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009393306527019584, + "loss": 3.3892, + "step": 1140 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009392059095248978, + "loss": 3.3798, + "step": 1141 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009390810465368499, + "loss": 3.3717, + "step": 1142 + }, + { + "epoch": 0.18, + "learning_rate": 0.000938956063771876, + "loss": 3.3771, + "step": 1143 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009388309612640703, + "loss": 3.476, + "step": 1144 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009387057390475593, + "loss": 3.3482, + "step": 1145 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009385803971565025, + "loss": 3.343, + "step": 1146 + }, + { + "epoch": 0.18, + "learning_rate": 0.000938454935625092, + "loss": 3.3992, + "step": 1147 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009383293544875523, + "loss": 3.2775, + "step": 1148 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009382036537781408, + "loss": 3.4508, + "step": 1149 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009380778335311472, + "loss": 3.3439, + "step": 1150 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009379518937808941, + "loss": 3.452, + "step": 1151 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009378258345617368, + "loss": 3.3057, + "step": 1152 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009376996559080628, + "loss": 3.6207, + "step": 1153 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009375733578542925, + "loss": 3.4744, + "step": 1154 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009374469404348784, + "loss": 3.3499, + "step": 1155 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009373204036843064, + "loss": 3.2892, + "step": 1156 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009371937476370942, + "loss": 3.4037, + "step": 1157 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009370669723277922, + "loss": 3.1885, + "step": 1158 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009369400777909838, + "loss": 3.2903, + "step": 1159 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009368130640612842, + "loss": 3.4753, + "step": 1160 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009366859311733418, + "loss": 3.3379, + "step": 1161 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009365586791618368, + "loss": 3.2838, + "step": 1162 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009364313080614826, + "loss": 3.4207, + "step": 1163 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009363038179070245, + "loss": 3.4738, + "step": 1164 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009361762087332408, + "loss": 3.3545, + "step": 1165 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009360484805749418, + "loss": 3.269, + "step": 1166 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009359206334669707, + "loss": 3.2842, + "step": 1167 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009357926674442027, + "loss": 3.2509, + "step": 1168 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009356645825415459, + "loss": 3.4129, + "step": 1169 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009355363787939404, + "loss": 3.5417, + "step": 1170 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009354080562363588, + "loss": 3.4689, + "step": 1171 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009352796149038063, + "loss": 3.3206, + "step": 1172 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009351510548313204, + "loss": 3.3774, + "step": 1173 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009350223760539713, + "loss": 3.4419, + "step": 1174 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009348935786068608, + "loss": 3.453, + "step": 1175 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009347646625251238, + "loss": 3.3074, + "step": 1176 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009346356278439274, + "loss": 3.4048, + "step": 1177 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009345064745984709, + "loss": 3.3332, + "step": 1178 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009343772028239858, + "loss": 3.4451, + "step": 1179 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009342478125557366, + "loss": 3.3869, + "step": 1180 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009341183038290193, + "loss": 3.3183, + "step": 1181 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009339886766791628, + "loss": 3.4331, + "step": 1182 + }, + { + "epoch": 0.19, + "learning_rate": 0.000933858931141528, + "loss": 3.3845, + "step": 1183 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009337290672515081, + "loss": 3.299, + "step": 1184 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009335990850445288, + "loss": 3.5026, + "step": 1185 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009334689845560479, + "loss": 3.434, + "step": 1186 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009333387658215555, + "loss": 3.3191, + "step": 1187 + }, + { + "epoch": 0.19, + "learning_rate": 0.000933208428876574, + "loss": 3.3561, + "step": 1188 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009330779737566581, + "loss": 3.2574, + "step": 1189 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009329474004973945, + "loss": 3.2602, + "step": 1190 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009328167091344024, + "loss": 3.2468, + "step": 1191 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009326858997033329, + "loss": 3.4466, + "step": 1192 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009325549722398698, + "loss": 3.3555, + "step": 1193 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009324239267797287, + "loss": 3.2234, + "step": 1194 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009322927633586575, + "loss": 3.3295, + "step": 1195 + }, + { + "epoch": 0.19, + "learning_rate": 0.000932161482012436, + "loss": 3.272, + "step": 1196 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009320300827768769, + "loss": 3.4187, + "step": 1197 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009318985656878243, + "loss": 3.4064, + "step": 1198 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009317669307811547, + "loss": 3.3096, + "step": 1199 + }, + { + "epoch": 0.19, + "learning_rate": 0.000931635178092777, + "loss": 3.393, + "step": 1200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009315033076586319, + "loss": 3.2821, + "step": 1201 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009313713195146923, + "loss": 3.3809, + "step": 1202 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009312392136969634, + "loss": 3.3547, + "step": 1203 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009311069902414821, + "loss": 3.5618, + "step": 1204 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009309746491843181, + "loss": 3.3222, + "step": 1205 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009308421905615722, + "loss": 3.3861, + "step": 1206 + }, + { + "epoch": 0.19, + "learning_rate": 0.000930709614409378, + "loss": 3.5047, + "step": 1207 + }, + { + "epoch": 0.19, + "learning_rate": 0.000930576920763901, + "loss": 3.4651, + "step": 1208 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009304441096613386, + "loss": 3.4263, + "step": 1209 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009303111811379206, + "loss": 3.3901, + "step": 1210 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009301781352299081, + "loss": 3.3603, + "step": 1211 + }, + { + "epoch": 0.2, + "learning_rate": 0.000930044971973595, + "loss": 3.3481, + "step": 1212 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009299116914053071, + "loss": 3.2465, + "step": 1213 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009297782935614017, + "loss": 3.4738, + "step": 1214 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009296447784782684, + "loss": 3.3136, + "step": 1215 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009295111461923289, + "loss": 3.2506, + "step": 1216 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009293773967400368, + "loss": 3.2414, + "step": 1217 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009292435301578773, + "loss": 3.3469, + "step": 1218 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009291095464823683, + "loss": 3.3221, + "step": 1219 + }, + { + "epoch": 0.2, + "learning_rate": 0.000928975445750059, + "loss": 3.357, + "step": 1220 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009288412279975307, + "loss": 3.3918, + "step": 1221 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009287068932613967, + "loss": 3.4859, + "step": 1222 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009285724415783023, + "loss": 3.4007, + "step": 1223 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009284378729849243, + "loss": 3.2551, + "step": 1224 + }, + { + "epoch": 0.2, + "learning_rate": 0.000928303187517972, + "loss": 3.3322, + "step": 1225 + }, + { + "epoch": 0.2, + "learning_rate": 0.000928168385214186, + "loss": 3.2775, + "step": 1226 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009280334661103391, + "loss": 3.4409, + "step": 1227 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009278984302432358, + "loss": 3.3964, + "step": 1228 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009277632776497129, + "loss": 3.3164, + "step": 1229 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009276280083666382, + "loss": 3.3247, + "step": 1230 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009274926224309121, + "loss": 3.2771, + "step": 1231 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009273571198794663, + "loss": 3.3696, + "step": 1232 + }, + { + "epoch": 0.2, + "learning_rate": 0.000927221500749265, + "loss": 3.2238, + "step": 1233 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009270857650773032, + "loss": 3.2579, + "step": 1234 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009269499129006085, + "loss": 3.4019, + "step": 1235 + }, + { + "epoch": 0.2, + "learning_rate": 0.00092681394425624, + "loss": 3.4263, + "step": 1236 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009266778591812885, + "loss": 3.4831, + "step": 1237 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009265416577128769, + "loss": 3.5093, + "step": 1238 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009264053398881593, + "loss": 3.4835, + "step": 1239 + }, + { + "epoch": 0.2, + "learning_rate": 0.000926268905744322, + "loss": 3.473, + "step": 1240 + }, + { + "epoch": 0.2, + "learning_rate": 0.000926132355318583, + "loss": 3.3827, + "step": 1241 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009259956886481916, + "loss": 3.4095, + "step": 1242 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009258589057704291, + "loss": 3.2897, + "step": 1243 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009257220067226088, + "loss": 3.4457, + "step": 1244 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009255849915420754, + "loss": 3.3444, + "step": 1245 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009254478602662049, + "loss": 3.345, + "step": 1246 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009253106129324056, + "loss": 3.2801, + "step": 1247 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009251732495781171, + "loss": 3.4944, + "step": 1248 + }, + { + "epoch": 0.2, + "learning_rate": 0.000925035770240811, + "loss": 3.3267, + "step": 1249 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009248981749579899, + "loss": 3.4009, + "step": 1250 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009247604637671887, + "loss": 3.4202, + "step": 1251 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009246226367059736, + "loss": 3.2489, + "step": 1252 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009244846938119422, + "loss": 3.4814, + "step": 1253 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009243466351227243, + "loss": 3.416, + "step": 1254 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009242084606759809, + "loss": 3.3531, + "step": 1255 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009240701705094044, + "loss": 3.3162, + "step": 1256 + }, + { + "epoch": 0.2, + "learning_rate": 0.000923931764660719, + "loss": 3.2706, + "step": 1257 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009237932431676806, + "loss": 3.3022, + "step": 1258 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009236546060680761, + "loss": 3.3965, + "step": 1259 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009235158533997249, + "loss": 3.4596, + "step": 1260 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009233769852004769, + "loss": 3.3808, + "step": 1261 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009232380015082143, + "loss": 3.3786, + "step": 1262 + }, + { + "epoch": 0.2, + "learning_rate": 0.00092309890236085, + "loss": 3.4302, + "step": 1263 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009229596877963292, + "loss": 3.3764, + "step": 1264 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009228203578526281, + "loss": 3.4333, + "step": 1265 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009226809125677548, + "loss": 3.5228, + "step": 1266 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009225413519797482, + "loss": 3.2941, + "step": 1267 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009224016761266793, + "loss": 3.3698, + "step": 1268 + }, + { + "epoch": 0.2, + "learning_rate": 0.00092226188504665, + "loss": 3.3965, + "step": 1269 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009221219787777942, + "loss": 3.3558, + "step": 1270 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009219819573582768, + "loss": 3.5112, + "step": 1271 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009218418208262944, + "loss": 3.3175, + "step": 1272 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009217015692200745, + "loss": 3.3449, + "step": 1273 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009215612025778767, + "loss": 3.3651, + "step": 1274 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009214207209379914, + "loss": 3.4455, + "step": 1275 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009212801243387406, + "loss": 3.4602, + "step": 1276 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009211394128184778, + "loss": 3.5385, + "step": 1277 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009209985864155877, + "loss": 3.2499, + "step": 1278 + }, + { + "epoch": 0.21, + "learning_rate": 0.000920857645168486, + "loss": 3.4178, + "step": 1279 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009207165891156205, + "loss": 3.452, + "step": 1280 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009205754182954696, + "loss": 3.3734, + "step": 1281 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009204341327465434, + "loss": 3.4437, + "step": 1282 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009202927325073832, + "loss": 3.4694, + "step": 1283 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009201512176165615, + "loss": 3.4712, + "step": 1284 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009200095881126822, + "loss": 3.3471, + "step": 1285 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009198678440343804, + "loss": 3.4713, + "step": 1286 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009197259854203226, + "loss": 3.3782, + "step": 1287 + }, + { + "epoch": 0.21, + "learning_rate": 0.000919584012309206, + "loss": 3.4582, + "step": 1288 + }, + { + "epoch": 0.21, + "learning_rate": 0.00091944192473976, + "loss": 3.4302, + "step": 1289 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009192997227507443, + "loss": 3.3561, + "step": 1290 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009191574063809504, + "loss": 3.41, + "step": 1291 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009190149756692006, + "loss": 3.3291, + "step": 1292 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009188724306543486, + "loss": 3.4251, + "step": 1293 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009187297713752794, + "loss": 3.4574, + "step": 1294 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009185869978709087, + "loss": 3.2576, + "step": 1295 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009184441101801841, + "loss": 3.5057, + "step": 1296 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009183011083420838, + "loss": 3.2183, + "step": 1297 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009181579923956171, + "loss": 3.336, + "step": 1298 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009180147623798249, + "loss": 3.3807, + "step": 1299 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009178714183337787, + "loss": 3.3969, + "step": 1300 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009177279602965813, + "loss": 3.4602, + "step": 1301 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009175843883073667, + "loss": 3.2714, + "step": 1302 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009174407024053, + "loss": 3.3338, + "step": 1303 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009172969026295769, + "loss": 3.175, + "step": 1304 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009171529890194252, + "loss": 3.3754, + "step": 1305 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009170089616141026, + "loss": 3.4411, + "step": 1306 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009168648204528983, + "loss": 3.258, + "step": 1307 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009167205655751329, + "loss": 3.2787, + "step": 1308 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009165761970201574, + "loss": 3.401, + "step": 1309 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009164317148273543, + "loss": 3.4217, + "step": 1310 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009162871190361369, + "loss": 3.3705, + "step": 1311 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009161424096859492, + "loss": 3.4489, + "step": 1312 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009159975868162668, + "loss": 3.3931, + "step": 1313 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009158526504665957, + "loss": 3.2986, + "step": 1314 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009157076006764733, + "loss": 3.3457, + "step": 1315 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009155624374854676, + "loss": 3.4797, + "step": 1316 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009154171609331778, + "loss": 3.3956, + "step": 1317 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009152717710592336, + "loss": 3.3376, + "step": 1318 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009151262679032963, + "loss": 3.3388, + "step": 1319 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009149806515050573, + "loss": 3.4063, + "step": 1320 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009148349219042396, + "loss": 3.5079, + "step": 1321 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009146890791405966, + "loss": 3.2323, + "step": 1322 + }, + { + "epoch": 0.21, + "learning_rate": 0.000914543123253913, + "loss": 3.3223, + "step": 1323 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009143970542840039, + "loss": 3.3247, + "step": 1324 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009142508722707154, + "loss": 3.4344, + "step": 1325 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009141045772539247, + "loss": 3.4103, + "step": 1326 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009139581692735395, + "loss": 3.2872, + "step": 1327 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009138116483694984, + "loss": 3.3249, + "step": 1328 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009136650145817708, + "loss": 3.3037, + "step": 1329 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009135182679503573, + "loss": 3.2295, + "step": 1330 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009133714085152884, + "loss": 3.2756, + "step": 1331 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009132244363166262, + "loss": 3.2556, + "step": 1332 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009130773513944632, + "loss": 3.4014, + "step": 1333 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009129301537889227, + "loss": 3.2324, + "step": 1334 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009127828435401587, + "loss": 3.2639, + "step": 1335 + }, + { + "epoch": 0.22, + "learning_rate": 0.000912635420688356, + "loss": 3.5575, + "step": 1336 + }, + { + "epoch": 0.22, + "learning_rate": 0.00091248788527373, + "loss": 3.5006, + "step": 1337 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009123402373365269, + "loss": 3.4526, + "step": 1338 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009121924769170237, + "loss": 3.3776, + "step": 1339 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009120446040555279, + "loss": 3.4802, + "step": 1340 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009118966187923777, + "loss": 3.4072, + "step": 1341 + }, + { + "epoch": 0.22, + "learning_rate": 0.000911748521167942, + "loss": 3.5645, + "step": 1342 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009116003112226205, + "loss": 3.3619, + "step": 1343 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009114519889968434, + "loss": 3.3224, + "step": 1344 + }, + { + "epoch": 0.22, + "learning_rate": 0.000911303554531071, + "loss": 3.249, + "step": 1345 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009111550078657956, + "loss": 3.1902, + "step": 1346 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009110063490415385, + "loss": 3.4587, + "step": 1347 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009108575780988527, + "loss": 3.3273, + "step": 1348 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009107086950783215, + "loss": 3.3583, + "step": 1349 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009105597000205585, + "loss": 3.4856, + "step": 1350 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009104105929662081, + "loss": 3.3383, + "step": 1351 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009102613739559452, + "loss": 3.4071, + "step": 1352 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009101120430304755, + "loss": 3.3558, + "step": 1353 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009099626002305346, + "loss": 3.4049, + "step": 1354 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009098130455968892, + "loss": 3.3784, + "step": 1355 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009096633791703364, + "loss": 3.2711, + "step": 1356 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009095136009917036, + "loss": 3.3998, + "step": 1357 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009093637111018487, + "loss": 3.3993, + "step": 1358 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009092137095416604, + "loss": 3.5481, + "step": 1359 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009090635963520575, + "loss": 3.4415, + "step": 1360 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009089133715739893, + "loss": 3.3139, + "step": 1361 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009087630352484357, + "loss": 3.2524, + "step": 1362 + }, + { + "epoch": 0.22, + "learning_rate": 0.000908612587416407, + "loss": 3.2122, + "step": 1363 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009084620281189438, + "loss": 3.4643, + "step": 1364 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009083113573971171, + "loss": 3.4022, + "step": 1365 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009081605752920285, + "loss": 3.3301, + "step": 1366 + }, + { + "epoch": 0.22, + "learning_rate": 0.00090800968184481, + "loss": 3.4332, + "step": 1367 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009078586770966236, + "loss": 3.269, + "step": 1368 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009077075610886616, + "loss": 3.3823, + "step": 1369 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009075563338621476, + "loss": 3.2805, + "step": 1370 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009074049954583344, + "loss": 3.2919, + "step": 1371 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009072535459185056, + "loss": 3.2892, + "step": 1372 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009071019852839755, + "loss": 3.3164, + "step": 1373 + }, + { + "epoch": 0.22, + "learning_rate": 0.000906950313596088, + "loss": 3.4243, + "step": 1374 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009067985308962176, + "loss": 3.5045, + "step": 1375 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009066466372257694, + "loss": 3.3844, + "step": 1376 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009064946326261784, + "loss": 3.3357, + "step": 1377 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009063425171389097, + "loss": 3.3924, + "step": 1378 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009061902908054589, + "loss": 3.3363, + "step": 1379 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009060379536673521, + "loss": 3.1473, + "step": 1380 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009058855057661452, + "loss": 3.3926, + "step": 1381 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009057329471434244, + "loss": 3.3823, + "step": 1382 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009055802778408063, + "loss": 3.379, + "step": 1383 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009054274978999373, + "loss": 3.3205, + "step": 1384 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009052746073624947, + "loss": 3.4581, + "step": 1385 + }, + { + "epoch": 0.22, + "learning_rate": 0.000905121606270185, + "loss": 3.4159, + "step": 1386 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009049684946647458, + "loss": 3.4125, + "step": 1387 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009048152725879442, + "loss": 3.3618, + "step": 1388 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009046619400815777, + "loss": 3.2907, + "step": 1389 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009045084971874737, + "loss": 3.341, + "step": 1390 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009043549439474903, + "loss": 3.3855, + "step": 1391 + }, + { + "epoch": 0.22, + "learning_rate": 0.000904201280403515, + "loss": 3.4016, + "step": 1392 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009040475065974656, + "loss": 3.3087, + "step": 1393 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009038936225712901, + "loss": 3.3458, + "step": 1394 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009037396283669667, + "loss": 3.3492, + "step": 1395 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009035855240265037, + "loss": 3.2055, + "step": 1396 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009034313095919386, + "loss": 3.2368, + "step": 1397 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009032769851053399, + "loss": 3.5413, + "step": 1398 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009031225506088057, + "loss": 3.3277, + "step": 1399 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009029680061444645, + "loss": 3.4812, + "step": 1400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009028133517544741, + "loss": 3.4095, + "step": 1401 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009026585874810227, + "loss": 3.3131, + "step": 1402 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009025037133663287, + "loss": 3.3154, + "step": 1403 + }, + { + "epoch": 0.23, + "learning_rate": 0.00090234872945264, + "loss": 3.3992, + "step": 1404 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009021936357822347, + "loss": 3.3944, + "step": 1405 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009020384323974209, + "loss": 3.4425, + "step": 1406 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009018831193405365, + "loss": 3.2933, + "step": 1407 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009017276966539491, + "loss": 3.3987, + "step": 1408 + }, + { + "epoch": 0.23, + "learning_rate": 0.000901572164380057, + "loss": 3.4202, + "step": 1409 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009014165225612874, + "loss": 3.3525, + "step": 1410 + }, + { + "epoch": 0.23, + "learning_rate": 0.000901260771240098, + "loss": 3.3778, + "step": 1411 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009011049104589759, + "loss": 3.3594, + "step": 1412 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009009489402604389, + "loss": 3.2807, + "step": 1413 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009007928606870339, + "loss": 3.4698, + "step": 1414 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009006366717813377, + "loss": 3.321, + "step": 1415 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009004803735859571, + "loss": 3.4248, + "step": 1416 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009003239661435289, + "loss": 3.4691, + "step": 1417 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009001674494967191, + "loss": 3.2513, + "step": 1418 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009000108236882245, + "loss": 3.3327, + "step": 1419 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008998540887607705, + "loss": 3.4059, + "step": 1420 + }, + { + "epoch": 0.23, + "learning_rate": 0.000899697244757113, + "loss": 3.3752, + "step": 1421 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008995402917200373, + "loss": 3.3514, + "step": 1422 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008993832296923591, + "loss": 3.3647, + "step": 1423 + }, + { + "epoch": 0.23, + "learning_rate": 0.000899226058716923, + "loss": 3.2287, + "step": 1424 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008990687788366037, + "loss": 3.3707, + "step": 1425 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008989113900943056, + "loss": 3.3156, + "step": 1426 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008987538925329628, + "loss": 3.4474, + "step": 1427 + }, + { + "epoch": 0.23, + "learning_rate": 0.000898596286195539, + "loss": 3.3224, + "step": 1428 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008984385711250277, + "loss": 3.4184, + "step": 1429 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008982807473644521, + "loss": 3.3285, + "step": 1430 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008981228149568647, + "loss": 3.348, + "step": 1431 + }, + { + "epoch": 0.23, + "learning_rate": 0.000897964773945348, + "loss": 3.3918, + "step": 1432 + }, + { + "epoch": 0.23, + "learning_rate": 0.000897806624373014, + "loss": 3.3156, + "step": 1433 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008976483662830046, + "loss": 3.4797, + "step": 1434 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008974899997184906, + "loss": 3.3313, + "step": 1435 + }, + { + "epoch": 0.23, + "learning_rate": 0.000897331524722673, + "loss": 3.2881, + "step": 1436 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008971729413387822, + "loss": 3.3171, + "step": 1437 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008970142496100782, + "loss": 3.3463, + "step": 1438 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008968554495798505, + "loss": 3.1659, + "step": 1439 + }, + { + "epoch": 0.23, + "learning_rate": 0.000896696541291418, + "loss": 3.2594, + "step": 1440 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008965375247881296, + "loss": 3.3362, + "step": 1441 + }, + { + "epoch": 0.23, + "learning_rate": 0.000896378400113363, + "loss": 3.3815, + "step": 1442 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008962191673105263, + "loss": 3.3254, + "step": 1443 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008960598264230563, + "loss": 3.306, + "step": 1444 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008959003774944198, + "loss": 3.3453, + "step": 1445 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008957408205681128, + "loss": 3.4091, + "step": 1446 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008955811556876605, + "loss": 3.3559, + "step": 1447 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008954213828966185, + "loss": 3.4489, + "step": 1448 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008952615022385709, + "loss": 3.5211, + "step": 1449 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008951015137571314, + "loss": 3.338, + "step": 1450 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008949414174959434, + "loss": 3.4531, + "step": 1451 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008947812134986797, + "loss": 3.3413, + "step": 1452 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008946209018090422, + "loss": 3.2718, + "step": 1453 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008944604824707623, + "loss": 3.2736, + "step": 1454 + }, + { + "epoch": 0.23, + "learning_rate": 0.000894299955527601, + "loss": 3.3943, + "step": 1455 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008941393210233482, + "loss": 3.1605, + "step": 1456 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008939785790018235, + "loss": 3.3159, + "step": 1457 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008938177295068757, + "loss": 3.2299, + "step": 1458 + }, + { + "epoch": 0.24, + "learning_rate": 0.000893656772582383, + "loss": 3.3505, + "step": 1459 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008934957082722528, + "loss": 3.1961, + "step": 1460 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008933345366204218, + "loss": 3.5531, + "step": 1461 + }, + { + "epoch": 0.24, + "learning_rate": 0.000893173257670856, + "loss": 3.4106, + "step": 1462 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008930118714675508, + "loss": 3.5009, + "step": 1463 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008928503780545307, + "loss": 3.3692, + "step": 1464 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008926887774758493, + "loss": 3.3384, + "step": 1465 + }, + { + "epoch": 0.24, + "learning_rate": 0.00089252706977559, + "loss": 3.2305, + "step": 1466 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008923652549978647, + "loss": 3.3736, + "step": 1467 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008922033331868149, + "loss": 3.3896, + "step": 1468 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008920413043866116, + "loss": 3.3662, + "step": 1469 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008918791686414543, + "loss": 3.4063, + "step": 1470 + }, + { + "epoch": 0.24, + "learning_rate": 0.000891716925995572, + "loss": 3.2955, + "step": 1471 + }, + { + "epoch": 0.24, + "learning_rate": 0.000891554576493223, + "loss": 3.2443, + "step": 1472 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008913921201786947, + "loss": 3.3375, + "step": 1473 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008912295570963033, + "loss": 3.3762, + "step": 1474 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008910668872903946, + "loss": 3.4337, + "step": 1475 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008909041108053433, + "loss": 3.2725, + "step": 1476 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008907412276855532, + "loss": 3.3808, + "step": 1477 + }, + { + "epoch": 0.24, + "learning_rate": 0.000890578237975457, + "loss": 3.2695, + "step": 1478 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008904151417195167, + "loss": 3.2181, + "step": 1479 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008902519389622237, + "loss": 3.2118, + "step": 1480 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008900886297480977, + "loss": 3.4683, + "step": 1481 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008899252141216879, + "loss": 3.3104, + "step": 1482 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008897616921275724, + "loss": 3.1985, + "step": 1483 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008895980638103587, + "loss": 3.3502, + "step": 1484 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008894343292146825, + "loss": 3.246, + "step": 1485 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008892704883852092, + "loss": 3.3688, + "step": 1486 + }, + { + "epoch": 0.24, + "learning_rate": 0.000889106541366633, + "loss": 3.2304, + "step": 1487 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008889424882036769, + "loss": 3.3946, + "step": 1488 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008887783289410931, + "loss": 3.3653, + "step": 1489 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008886140636236624, + "loss": 3.4106, + "step": 1490 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008884496922961949, + "loss": 3.2781, + "step": 1491 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008882852150035295, + "loss": 3.3197, + "step": 1492 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008881206317905337, + "loss": 3.3429, + "step": 1493 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008879559427021044, + "loss": 3.3133, + "step": 1494 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008877911477831671, + "loss": 3.3206, + "step": 1495 + }, + { + "epoch": 0.24, + "learning_rate": 0.000887626247078676, + "loss": 3.2705, + "step": 1496 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008874612406336147, + "loss": 3.2695, + "step": 1497 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008872961284929952, + "loss": 3.329, + "step": 1498 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008871309107018584, + "loss": 3.2673, + "step": 1499 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008869655873052738, + "loss": 3.315, + "step": 1500 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008868001583483405, + "loss": 3.3725, + "step": 1501 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008866346238761855, + "loss": 3.2297, + "step": 1502 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008864689839339652, + "loss": 3.3058, + "step": 1503 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008863032385668641, + "loss": 3.4595, + "step": 1504 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008861373878200964, + "loss": 3.3501, + "step": 1505 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008859714317389041, + "loss": 3.3853, + "step": 1506 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008858053703685587, + "loss": 3.4266, + "step": 1507 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008856392037543599, + "loss": 3.2929, + "step": 1508 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008854729319416363, + "loss": 3.3024, + "step": 1509 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008853065549757451, + "loss": 3.302, + "step": 1510 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008851400729020726, + "loss": 3.2701, + "step": 1511 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008849734857660331, + "loss": 3.2209, + "step": 1512 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008848067936130701, + "loss": 3.2898, + "step": 1513 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008846399964886555, + "loss": 3.4593, + "step": 1514 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008844730944382899, + "loss": 3.1545, + "step": 1515 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008843060875075024, + "loss": 3.3224, + "step": 1516 + }, + { + "epoch": 0.24, + "learning_rate": 0.000884138975741851, + "loss": 3.3049, + "step": 1517 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008839717591869221, + "loss": 3.3861, + "step": 1518 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008838044378883305, + "loss": 3.5483, + "step": 1519 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008836370118917201, + "loss": 3.2823, + "step": 1520 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008834694812427629, + "loss": 3.432, + "step": 1521 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008833018459871593, + "loss": 3.4038, + "step": 1522 + }, + { + "epoch": 0.25, + "learning_rate": 0.000883134106170639, + "loss": 3.3181, + "step": 1523 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008829662618389595, + "loss": 3.3551, + "step": 1524 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008827983130379072, + "loss": 3.233, + "step": 1525 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008826302598132964, + "loss": 3.4419, + "step": 1526 + }, + { + "epoch": 0.25, + "learning_rate": 0.000882462102210971, + "loss": 3.3194, + "step": 1527 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008822938402768024, + "loss": 3.2747, + "step": 1528 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008821254740566906, + "loss": 3.2828, + "step": 1529 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008819570035965645, + "loss": 3.3737, + "step": 1530 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008817884289423812, + "loss": 3.2664, + "step": 1531 + }, + { + "epoch": 0.25, + "learning_rate": 0.000881619750140126, + "loss": 3.267, + "step": 1532 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008814509672358126, + "loss": 3.5011, + "step": 1533 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008812820802754837, + "loss": 3.4976, + "step": 1534 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008811130893052099, + "loss": 3.4919, + "step": 1535 + }, + { + "epoch": 0.25, + "learning_rate": 0.00088094399437109, + "loss": 3.2727, + "step": 1536 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008807747955192517, + "loss": 3.2272, + "step": 1537 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008806054927958506, + "loss": 3.2577, + "step": 1538 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008804360862470709, + "loss": 3.4307, + "step": 1539 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008802665759191247, + "loss": 3.4068, + "step": 1540 + }, + { + "epoch": 0.25, + "learning_rate": 0.000880096961858253, + "loss": 3.3839, + "step": 1541 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008799272441107249, + "loss": 3.428, + "step": 1542 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008797574227228375, + "loss": 3.3235, + "step": 1543 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008795874977409167, + "loss": 3.4361, + "step": 1544 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008794174692113158, + "loss": 3.3737, + "step": 1545 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008792473371804174, + "loss": 3.2859, + "step": 1546 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008790771016946315, + "loss": 3.3732, + "step": 1547 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008789067628003968, + "loss": 3.4734, + "step": 1548 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008787363205441799, + "loss": 3.2824, + "step": 1549 + }, + { + "epoch": 0.25, + "learning_rate": 0.000878565774972476, + "loss": 3.471, + "step": 1550 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008783951261318079, + "loss": 3.3632, + "step": 1551 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008782243740687272, + "loss": 3.2654, + "step": 1552 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008780535188298134, + "loss": 3.2931, + "step": 1553 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008778825604616737, + "loss": 3.3474, + "step": 1554 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008777114990109442, + "loss": 3.3624, + "step": 1555 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008775403345242885, + "loss": 3.2995, + "step": 1556 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008773690670483988, + "loss": 3.4203, + "step": 1557 + }, + { + "epoch": 0.25, + "learning_rate": 0.000877197696629995, + "loss": 3.3728, + "step": 1558 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008770262233158253, + "loss": 3.3482, + "step": 1559 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008768546471526659, + "loss": 3.1999, + "step": 1560 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008766829681873212, + "loss": 3.3516, + "step": 1561 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008765111864666231, + "loss": 3.3693, + "step": 1562 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008763393020374324, + "loss": 3.3647, + "step": 1563 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008761673149466373, + "loss": 3.376, + "step": 1564 + }, + { + "epoch": 0.25, + "learning_rate": 0.000875995225241154, + "loss": 3.2895, + "step": 1565 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008758230329679271, + "loss": 3.2676, + "step": 1566 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008756507381739287, + "loss": 3.3312, + "step": 1567 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008754783409061593, + "loss": 3.2945, + "step": 1568 + }, + { + "epoch": 0.25, + "learning_rate": 0.000875305841211647, + "loss": 3.3013, + "step": 1569 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008751332391374482, + "loss": 3.322, + "step": 1570 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008749605347306467, + "loss": 3.3417, + "step": 1571 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008747877280383548, + "loss": 3.3849, + "step": 1572 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008746148191077123, + "loss": 3.2775, + "step": 1573 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008744418079858873, + "loss": 3.2642, + "step": 1574 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008742686947200751, + "loss": 3.4024, + "step": 1575 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008740954793574995, + "loss": 3.2527, + "step": 1576 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008739221619454121, + "loss": 3.3368, + "step": 1577 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008737487425310917, + "loss": 3.2765, + "step": 1578 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008735752211618456, + "loss": 3.3055, + "step": 1579 + }, + { + "epoch": 0.25, + "learning_rate": 0.000873401597885009, + "loss": 3.3211, + "step": 1580 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008732278727479441, + "loss": 3.3662, + "step": 1581 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008730540457980417, + "loss": 3.2515, + "step": 1582 + }, + { + "epoch": 0.26, + "learning_rate": 0.00087288011708272, + "loss": 3.4034, + "step": 1583 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008727060866494249, + "loss": 3.3333, + "step": 1584 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008725319545456303, + "loss": 3.4605, + "step": 1585 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008723577208188377, + "loss": 3.3403, + "step": 1586 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008721833855165761, + "loss": 3.2783, + "step": 1587 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008720089486864029, + "loss": 3.1317, + "step": 1588 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008718344103759021, + "loss": 3.2456, + "step": 1589 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008716597706326867, + "loss": 3.3759, + "step": 1590 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008714850295043961, + "loss": 3.2944, + "step": 1591 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008713101870386981, + "loss": 3.2833, + "step": 1592 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008711352432832881, + "loss": 3.2855, + "step": 1593 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008709601982858891, + "loss": 3.3516, + "step": 1594 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008707850520942512, + "loss": 3.3091, + "step": 1595 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008706098047561529, + "loss": 3.4651, + "step": 1596 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008704344563193998, + "loss": 3.3412, + "step": 1597 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008702590068318252, + "loss": 3.3547, + "step": 1598 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008700834563412902, + "loss": 3.2924, + "step": 1599 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008699078048956828, + "loss": 3.363, + "step": 1600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008697320525429194, + "loss": 3.4448, + "step": 1601 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008695561993309431, + "loss": 3.5214, + "step": 1602 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008693802453077251, + "loss": 3.4056, + "step": 1603 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008692041905212637, + "loss": 3.5342, + "step": 1604 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008690280350195853, + "loss": 3.4405, + "step": 1605 + }, + { + "epoch": 0.26, + "learning_rate": 0.000868851778850743, + "loss": 3.3173, + "step": 1606 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008686754220628179, + "loss": 3.3101, + "step": 1607 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008684989647039183, + "loss": 3.2712, + "step": 1608 + }, + { + "epoch": 0.26, + "learning_rate": 0.00086832240682218, + "loss": 3.3881, + "step": 1609 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008681457484657662, + "loss": 3.4118, + "step": 1610 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008679689896828677, + "loss": 3.3789, + "step": 1611 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008677921305217022, + "loss": 3.252, + "step": 1612 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008676151710305156, + "loss": 3.1559, + "step": 1613 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008674381112575802, + "loss": 3.4185, + "step": 1614 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008672609512511964, + "loss": 3.265, + "step": 1615 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008670836910596917, + "loss": 3.2801, + "step": 1616 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008669063307314207, + "loss": 3.2141, + "step": 1617 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008667288703147658, + "loss": 3.3782, + "step": 1618 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008665513098581363, + "loss": 3.307, + "step": 1619 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008663736494099688, + "loss": 3.2769, + "step": 1620 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008661958890187276, + "loss": 3.3125, + "step": 1621 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008660180287329036, + "loss": 3.2936, + "step": 1622 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008658400686010155, + "loss": 3.3425, + "step": 1623 + }, + { + "epoch": 0.26, + "learning_rate": 0.000865662008671609, + "loss": 3.2615, + "step": 1624 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008654838489932573, + "loss": 3.3733, + "step": 1625 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008653055896145602, + "loss": 3.4423, + "step": 1626 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008651272305841454, + "loss": 3.3877, + "step": 1627 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008649487719506671, + "loss": 3.3195, + "step": 1628 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008647702137628074, + "loss": 3.322, + "step": 1629 + }, + { + "epoch": 0.26, + "learning_rate": 0.000864591556069275, + "loss": 3.2975, + "step": 1630 + }, + { + "epoch": 0.26, + "learning_rate": 0.000864412798918806, + "loss": 3.2645, + "step": 1631 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008642339423601636, + "loss": 3.4851, + "step": 1632 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008640549864421381, + "loss": 3.164, + "step": 1633 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008638759312135467, + "loss": 3.114, + "step": 1634 + }, + { + "epoch": 0.26, + "learning_rate": 0.000863696776723234, + "loss": 3.3485, + "step": 1635 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008635175230200715, + "loss": 3.4216, + "step": 1636 + }, + { + "epoch": 0.26, + "learning_rate": 0.000863338170152958, + "loss": 3.2377, + "step": 1637 + }, + { + "epoch": 0.26, + "learning_rate": 0.000863158718170819, + "loss": 3.4166, + "step": 1638 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008629791671226072, + "loss": 3.1966, + "step": 1639 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008627995170573025, + "loss": 3.4511, + "step": 1640 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008626197680239115, + "loss": 3.3347, + "step": 1641 + }, + { + "epoch": 0.26, + "learning_rate": 0.000862439920071468, + "loss": 3.1548, + "step": 1642 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008622599732490326, + "loss": 3.2822, + "step": 1643 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008620799276056932, + "loss": 3.4061, + "step": 1644 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008618997831905644, + "loss": 3.2086, + "step": 1645 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008617195400527877, + "loss": 3.2865, + "step": 1646 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008615391982415316, + "loss": 3.2712, + "step": 1647 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008613587578059916, + "loss": 3.3824, + "step": 1648 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008611782187953903, + "loss": 3.3385, + "step": 1649 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008609975812589766, + "loss": 3.3252, + "step": 1650 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008608168452460265, + "loss": 3.312, + "step": 1651 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008606360108058435, + "loss": 3.2779, + "step": 1652 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008604550779877571, + "loss": 3.3992, + "step": 1653 + }, + { + "epoch": 0.27, + "learning_rate": 0.000860274046841124, + "loss": 3.3449, + "step": 1654 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008600929174153275, + "loss": 3.2202, + "step": 1655 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008599116897597784, + "loss": 3.3143, + "step": 1656 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008597303639239133, + "loss": 3.2843, + "step": 1657 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008595489399571964, + "loss": 3.4379, + "step": 1658 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008593674179091182, + "loss": 3.5006, + "step": 1659 + }, + { + "epoch": 0.27, + "learning_rate": 0.000859185797829196, + "loss": 3.2389, + "step": 1660 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008590040797669741, + "loss": 3.3283, + "step": 1661 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008588222637720233, + "loss": 3.2288, + "step": 1662 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008586403498939414, + "loss": 3.2369, + "step": 1663 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008584583381823523, + "loss": 3.3218, + "step": 1664 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008582762286869073, + "loss": 3.2937, + "step": 1665 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008580940214572841, + "loss": 3.2713, + "step": 1666 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008579117165431867, + "loss": 3.3341, + "step": 1667 + }, + { + "epoch": 0.27, + "learning_rate": 0.000857729313994346, + "loss": 3.3384, + "step": 1668 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008575468138605203, + "loss": 3.3807, + "step": 1669 + }, + { + "epoch": 0.27, + "learning_rate": 0.000857364216191493, + "loss": 3.4699, + "step": 1670 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008571815210370753, + "loss": 3.3673, + "step": 1671 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008569987284471046, + "loss": 3.3418, + "step": 1672 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008568158384714448, + "loss": 3.2977, + "step": 1673 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008566328511599864, + "loss": 3.3425, + "step": 1674 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008564497665626468, + "loss": 3.293, + "step": 1675 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008562665847293695, + "loss": 3.2654, + "step": 1676 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008560833057101246, + "loss": 3.3123, + "step": 1677 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008558999295549088, + "loss": 3.3273, + "step": 1678 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008557164563137454, + "loss": 3.3569, + "step": 1679 + }, + { + "epoch": 0.27, + "learning_rate": 0.000855532886036684, + "loss": 3.2548, + "step": 1680 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008553492187738005, + "loss": 3.1176, + "step": 1681 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008551654545751981, + "loss": 3.3414, + "step": 1682 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008549815934910052, + "loss": 3.2832, + "step": 1683 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008547976355713776, + "loss": 3.3259, + "step": 1684 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008546135808664972, + "loss": 3.3804, + "step": 1685 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008544294294265722, + "loss": 3.4757, + "step": 1686 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008542451813018372, + "loss": 3.2337, + "step": 1687 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008540608365425532, + "loss": 3.3022, + "step": 1688 + }, + { + "epoch": 0.27, + "learning_rate": 0.000853876395199008, + "loss": 3.3891, + "step": 1689 + }, + { + "epoch": 0.27, + "learning_rate": 0.000853691857321515, + "loss": 3.4109, + "step": 1690 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008535072229604144, + "loss": 3.2647, + "step": 1691 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008533224921660725, + "loss": 3.3116, + "step": 1692 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008531376649888822, + "loss": 3.3762, + "step": 1693 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008529527414792623, + "loss": 3.2327, + "step": 1694 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008527677216876584, + "loss": 3.2331, + "step": 1695 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008525826056645418, + "loss": 3.405, + "step": 1696 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008523973934604104, + "loss": 3.2509, + "step": 1697 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008522120851257881, + "loss": 3.354, + "step": 1698 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008520266807112252, + "loss": 3.3016, + "step": 1699 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008518411802672984, + "loss": 3.3542, + "step": 1700 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008516555838446101, + "loss": 3.3469, + "step": 1701 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008514698914937894, + "loss": 3.3224, + "step": 1702 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008512841032654911, + "loss": 3.3695, + "step": 1703 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008510982192103965, + "loss": 3.353, + "step": 1704 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008509122393792129, + "loss": 3.3689, + "step": 1705 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008507261638226735, + "loss": 3.3009, + "step": 1706 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008505399925915382, + "loss": 3.2994, + "step": 1707 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008503537257365925, + "loss": 3.2778, + "step": 1708 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008501673633086482, + "loss": 3.4669, + "step": 1709 + }, + { + "epoch": 0.28, + "learning_rate": 0.000849980905358543, + "loss": 3.4403, + "step": 1710 + }, + { + "epoch": 0.28, + "learning_rate": 0.000849794351937141, + "loss": 3.3289, + "step": 1711 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008496077030953318, + "loss": 3.3796, + "step": 1712 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008494209588840313, + "loss": 3.3422, + "step": 1713 + }, + { + "epoch": 0.28, + "learning_rate": 0.000849234119354182, + "loss": 3.337, + "step": 1714 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008490471845567513, + "loss": 3.2886, + "step": 1715 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008488601545427333, + "loss": 3.2523, + "step": 1716 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008486730293631482, + "loss": 3.2376, + "step": 1717 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008484858090690415, + "loss": 3.3598, + "step": 1718 + }, + { + "epoch": 0.28, + "learning_rate": 0.000848298493711485, + "loss": 3.3352, + "step": 1719 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008481110833415769, + "loss": 3.316, + "step": 1720 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008479235780104405, + "loss": 3.3215, + "step": 1721 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008477359777692255, + "loss": 3.2302, + "step": 1722 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008475482826691075, + "loss": 3.299, + "step": 1723 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008473604927612874, + "loss": 3.1687, + "step": 1724 + }, + { + "epoch": 0.28, + "learning_rate": 0.000847172608096993, + "loss": 3.2639, + "step": 1725 + }, + { + "epoch": 0.28, + "learning_rate": 0.000846984628727477, + "loss": 3.4036, + "step": 1726 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008467965547040184, + "loss": 3.2366, + "step": 1727 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008466083860779219, + "loss": 3.2039, + "step": 1728 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008464201229005182, + "loss": 3.3338, + "step": 1729 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008462317652231631, + "loss": 3.3872, + "step": 1730 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008460433130972392, + "loss": 3.2931, + "step": 1731 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008458547665741542, + "loss": 3.4087, + "step": 1732 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008456661257053418, + "loss": 3.3458, + "step": 1733 + }, + { + "epoch": 0.28, + "learning_rate": 0.000845477390542261, + "loss": 3.3708, + "step": 1734 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008452885611363974, + "loss": 3.3202, + "step": 1735 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008450996375392613, + "loss": 3.3844, + "step": 1736 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008449106198023892, + "loss": 3.1221, + "step": 1737 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008447215079773438, + "loss": 3.1685, + "step": 1738 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008445323021157123, + "loss": 3.2708, + "step": 1739 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008443430022691085, + "loss": 3.3997, + "step": 1740 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008441536084891714, + "loss": 3.3485, + "step": 1741 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008439641208275657, + "loss": 3.5026, + "step": 1742 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008437745393359817, + "loss": 3.4389, + "step": 1743 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008435848640661356, + "loss": 3.2722, + "step": 1744 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008433950950697686, + "loss": 3.2661, + "step": 1745 + }, + { + "epoch": 0.28, + "learning_rate": 0.000843205232398648, + "loss": 3.4215, + "step": 1746 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008430152761045664, + "loss": 3.345, + "step": 1747 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008428252262393419, + "loss": 3.3567, + "step": 1748 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008426350828548182, + "loss": 3.3495, + "step": 1749 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008424448460028647, + "loss": 3.244, + "step": 1750 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008422545157353759, + "loss": 3.373, + "step": 1751 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008420640921042721, + "loss": 3.2599, + "step": 1752 + }, + { + "epoch": 0.28, + "learning_rate": 0.000841873575161499, + "loss": 3.3489, + "step": 1753 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008416829649590277, + "loss": 3.5301, + "step": 1754 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008414922615488546, + "loss": 3.2543, + "step": 1755 + }, + { + "epoch": 0.28, + "learning_rate": 0.000841301464983002, + "loss": 3.4799, + "step": 1756 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008411105753135169, + "loss": 3.2386, + "step": 1757 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008409195925924725, + "loss": 3.3996, + "step": 1758 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008407285168719667, + "loss": 3.2367, + "step": 1759 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008405373482041232, + "loss": 3.2706, + "step": 1760 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008403460866410908, + "loss": 3.2546, + "step": 1761 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008401547322350438, + "loss": 3.3229, + "step": 1762 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008399632850381817, + "loss": 3.3638, + "step": 1763 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008397717451027296, + "loss": 3.3542, + "step": 1764 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008395801124809374, + "loss": 3.2764, + "step": 1765 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008393883872250807, + "loss": 3.3415, + "step": 1766 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008391965693874602, + "loss": 3.2087, + "step": 1767 + }, + { + "epoch": 0.29, + "learning_rate": 0.000839004659020402, + "loss": 3.3824, + "step": 1768 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008388126561762572, + "loss": 3.2754, + "step": 1769 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008386205609074024, + "loss": 3.3948, + "step": 1770 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008384283732662391, + "loss": 3.3137, + "step": 1771 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008382360933051943, + "loss": 3.2394, + "step": 1772 + }, + { + "epoch": 0.29, + "learning_rate": 0.00083804372107672, + "loss": 3.3858, + "step": 1773 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008378512566332934, + "loss": 3.2368, + "step": 1774 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008376587000274169, + "loss": 3.2317, + "step": 1775 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008374660513116181, + "loss": 3.37, + "step": 1776 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008372733105384496, + "loss": 3.3567, + "step": 1777 + }, + { + "epoch": 0.29, + "learning_rate": 0.000837080477760489, + "loss": 3.4351, + "step": 1778 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008368875530303394, + "loss": 3.2047, + "step": 1779 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008366945364006287, + "loss": 3.3192, + "step": 1780 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008365014279240096, + "loss": 3.1908, + "step": 1781 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008363082276531607, + "loss": 3.3268, + "step": 1782 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008361149356407845, + "loss": 3.3165, + "step": 1783 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008359215519396096, + "loss": 3.337, + "step": 1784 + }, + { + "epoch": 0.29, + "learning_rate": 0.000835728076602389, + "loss": 3.2683, + "step": 1785 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008355345096819008, + "loss": 3.1733, + "step": 1786 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008353408512309482, + "loss": 3.3719, + "step": 1787 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008351471013023591, + "loss": 3.4096, + "step": 1788 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008349532599489869, + "loss": 3.3843, + "step": 1789 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008347593272237092, + "loss": 3.3216, + "step": 1790 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008345653031794292, + "loss": 3.4051, + "step": 1791 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008343711878690746, + "loss": 3.4066, + "step": 1792 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008341769813455979, + "loss": 3.3974, + "step": 1793 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008339826836619771, + "loss": 3.1576, + "step": 1794 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008337882948712146, + "loss": 3.2871, + "step": 1795 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008335938150263373, + "loss": 3.3098, + "step": 1796 + }, + { + "epoch": 0.29, + "learning_rate": 0.000833399244180398, + "loss": 3.1953, + "step": 1797 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008332045823864733, + "loss": 3.2946, + "step": 1798 + }, + { + "epoch": 0.29, + "learning_rate": 0.000833009829697665, + "loss": 3.3054, + "step": 1799 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008328149861670997, + "loss": 3.3388, + "step": 1800 + }, + { + "epoch": 0.29, + "learning_rate": 0.000832620051847929, + "loss": 3.4077, + "step": 1801 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008324250267933285, + "loss": 3.3008, + "step": 1802 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008322299110564997, + "loss": 3.2172, + "step": 1803 + }, + { + "epoch": 0.29, + "learning_rate": 0.000832034704690668, + "loss": 3.2018, + "step": 1804 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008318394077490835, + "loss": 3.4325, + "step": 1805 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008316440202850216, + "loss": 3.23, + "step": 1806 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008314485423517819, + "loss": 3.4174, + "step": 1807 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008312529740026888, + "loss": 3.4263, + "step": 1808 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008310573152910916, + "loss": 3.4368, + "step": 1809 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008308615662703638, + "loss": 3.2422, + "step": 1810 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008306657269939037, + "loss": 3.243, + "step": 1811 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008304697975151347, + "loss": 3.4144, + "step": 1812 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008302737778875041, + "loss": 3.2929, + "step": 1813 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008300776681644841, + "loss": 3.2921, + "step": 1814 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008298814683995715, + "loss": 3.2962, + "step": 1815 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008296851786462877, + "loss": 3.3011, + "step": 1816 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008294887989581786, + "loss": 3.3418, + "step": 1817 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008292923293888145, + "loss": 3.3993, + "step": 1818 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008290957699917905, + "loss": 3.3173, + "step": 1819 + }, + { + "epoch": 0.29, + "learning_rate": 0.000828899120820726, + "loss": 3.3227, + "step": 1820 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008287023819292648, + "loss": 3.5335, + "step": 1821 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008285055533710752, + "loss": 3.2822, + "step": 1822 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008283086351998505, + "loss": 3.3104, + "step": 1823 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008281116274693078, + "loss": 3.3159, + "step": 1824 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008279145302331888, + "loss": 3.2211, + "step": 1825 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008277173435452597, + "loss": 3.2887, + "step": 1826 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008275200674593108, + "loss": 3.2633, + "step": 1827 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008273227020291574, + "loss": 3.3126, + "step": 1828 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008271252473086388, + "loss": 3.2822, + "step": 1829 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008269277033516184, + "loss": 3.2569, + "step": 1830 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008267300702119845, + "loss": 3.2969, + "step": 1831 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008265323479436493, + "loss": 3.4729, + "step": 1832 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008263345366005494, + "loss": 3.4062, + "step": 1833 + }, + { + "epoch": 0.3, + "learning_rate": 0.000826136636236646, + "loss": 3.3166, + "step": 1834 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008259386469059242, + "loss": 3.3158, + "step": 1835 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008257405686623936, + "loss": 3.2941, + "step": 1836 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008255424015600877, + "loss": 3.4495, + "step": 1837 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008253441456530647, + "loss": 3.2866, + "step": 1838 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008251458009954069, + "loss": 3.4028, + "step": 1839 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008249473676412207, + "loss": 3.3452, + "step": 1840 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008247488456446366, + "loss": 3.3798, + "step": 1841 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008245502350598096, + "loss": 3.4339, + "step": 1842 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008243515359409185, + "loss": 3.2506, + "step": 1843 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008241527483421665, + "loss": 3.3814, + "step": 1844 + }, + { + "epoch": 0.3, + "learning_rate": 0.000823953872317781, + "loss": 3.3459, + "step": 1845 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008237549079220135, + "loss": 3.4041, + "step": 1846 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008235558552091391, + "loss": 3.358, + "step": 1847 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008233567142334576, + "loss": 3.382, + "step": 1848 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008231574850492926, + "loss": 3.2565, + "step": 1849 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008229581677109921, + "loss": 3.2941, + "step": 1850 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008227587622729275, + "loss": 3.4547, + "step": 1851 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008225592687894949, + "loss": 3.4747, + "step": 1852 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008223596873151142, + "loss": 3.2097, + "step": 1853 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008221600179042288, + "loss": 3.1926, + "step": 1854 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008219602606113068, + "loss": 3.3898, + "step": 1855 + }, + { + "epoch": 0.3, + "learning_rate": 0.00082176041549084, + "loss": 3.2946, + "step": 1856 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008215604825973442, + "loss": 3.2356, + "step": 1857 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008213604619853591, + "loss": 3.2952, + "step": 1858 + }, + { + "epoch": 0.3, + "learning_rate": 0.000821160353709448, + "loss": 3.2517, + "step": 1859 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008209601578241989, + "loss": 3.4935, + "step": 1860 + }, + { + "epoch": 0.3, + "learning_rate": 0.000820759874384223, + "loss": 3.296, + "step": 1861 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008205595034441556, + "loss": 3.2877, + "step": 1862 + }, + { + "epoch": 0.3, + "learning_rate": 0.000820359045058656, + "loss": 3.3523, + "step": 1863 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008201584992824072, + "loss": 3.242, + "step": 1864 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008199578661701161, + "loss": 3.3313, + "step": 1865 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008197571457765134, + "loss": 3.1903, + "step": 1866 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008195563381563535, + "loss": 3.276, + "step": 1867 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008193554433644149, + "loss": 3.3687, + "step": 1868 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008191544614554996, + "loss": 3.3583, + "step": 1869 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008189533924844335, + "loss": 3.2527, + "step": 1870 + }, + { + "epoch": 0.3, + "learning_rate": 0.000818752236506066, + "loss": 3.1176, + "step": 1871 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008185509935752708, + "loss": 3.3722, + "step": 1872 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008183496637469449, + "loss": 3.3747, + "step": 1873 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008181482470760086, + "loss": 3.3236, + "step": 1874 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008179467436174071, + "loss": 3.3958, + "step": 1875 + }, + { + "epoch": 0.3, + "learning_rate": 0.000817745153426108, + "loss": 3.4579, + "step": 1876 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008175434765571031, + "loss": 3.2303, + "step": 1877 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008173417130654081, + "loss": 3.3073, + "step": 1878 + }, + { + "epoch": 0.3, + "learning_rate": 0.000817139863006062, + "loss": 3.2312, + "step": 1879 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008169379264341273, + "loss": 3.4417, + "step": 1880 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008167359034046906, + "loss": 3.3127, + "step": 1881 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008165337939728615, + "loss": 3.282, + "step": 1882 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008163315981937734, + "loss": 3.3903, + "step": 1883 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008161293161225834, + "loss": 3.299, + "step": 1884 + }, + { + "epoch": 0.3, + "learning_rate": 0.000815926947814472, + "loss": 3.3398, + "step": 1885 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008157244933246433, + "loss": 3.2728, + "step": 1886 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008155219527083248, + "loss": 3.2947, + "step": 1887 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008153193260207677, + "loss": 3.4153, + "step": 1888 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008151166133172464, + "loss": 3.2443, + "step": 1889 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008149138146530588, + "loss": 3.134, + "step": 1890 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008147109300835268, + "loss": 3.3258, + "step": 1891 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008145079596639947, + "loss": 3.373, + "step": 1892 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008143049034498311, + "loss": 3.5346, + "step": 1893 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008141017614964278, + "loss": 3.4896, + "step": 1894 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008138985338591998, + "loss": 3.3701, + "step": 1895 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008136952205935854, + "loss": 3.1809, + "step": 1896 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008134918217550467, + "loss": 3.1437, + "step": 1897 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008132883373990688, + "loss": 3.2098, + "step": 1898 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008130847675811599, + "loss": 3.2158, + "step": 1899 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008128811123568522, + "loss": 3.3247, + "step": 1900 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008126773717817007, + "loss": 3.3004, + "step": 1901 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008124735459112837, + "loss": 3.4167, + "step": 1902 + }, + { + "epoch": 0.31, + "learning_rate": 0.000812269634801203, + "loss": 3.3174, + "step": 1903 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008120656385070837, + "loss": 3.2891, + "step": 1904 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008118615570845734, + "loss": 3.3537, + "step": 1905 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008116573905893439, + "loss": 3.2605, + "step": 1906 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008114531390770896, + "loss": 3.2651, + "step": 1907 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008112488026035284, + "loss": 3.2966, + "step": 1908 + }, + { + "epoch": 0.31, + "learning_rate": 0.000811044381224401, + "loss": 3.3765, + "step": 1909 + }, + { + "epoch": 0.31, + "learning_rate": 0.000810839874995472, + "loss": 3.2007, + "step": 1910 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008106352839725282, + "loss": 3.3245, + "step": 1911 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008104306082113801, + "loss": 3.3985, + "step": 1912 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008102258477678613, + "loss": 3.2951, + "step": 1913 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008100210026978283, + "loss": 3.3456, + "step": 1914 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008098160730571609, + "loss": 3.0625, + "step": 1915 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008096110589017617, + "loss": 3.3031, + "step": 1916 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008094059602875567, + "loss": 3.3961, + "step": 1917 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008092007772704948, + "loss": 3.2407, + "step": 1918 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008089955099065476, + "loss": 3.2703, + "step": 1919 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008087901582517101, + "loss": 3.2769, + "step": 1920 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008085847223620002, + "loss": 3.1523, + "step": 1921 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008083792022934589, + "loss": 3.3082, + "step": 1922 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008081735981021499, + "loss": 3.2886, + "step": 1923 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008079679098441599, + "loss": 3.1875, + "step": 1924 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008077621375755987, + "loss": 3.2664, + "step": 1925 + }, + { + "epoch": 0.31, + "learning_rate": 0.000807556281352599, + "loss": 3.1982, + "step": 1926 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008073503412313159, + "loss": 3.2889, + "step": 1927 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008071443172679285, + "loss": 3.3018, + "step": 1928 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008069382095186375, + "loss": 3.2903, + "step": 1929 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008067320180396672, + "loss": 3.3355, + "step": 1930 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008065257428872647, + "loss": 3.2915, + "step": 1931 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008063193841176999, + "loss": 3.4211, + "step": 1932 + }, + { + "epoch": 0.31, + "learning_rate": 0.000806112941787265, + "loss": 3.3267, + "step": 1933 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008059064159522757, + "loss": 3.261, + "step": 1934 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008056998066690702, + "loss": 3.2882, + "step": 1935 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008054931139940092, + "loss": 3.3267, + "step": 1936 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008052863379834767, + "loss": 3.2724, + "step": 1937 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008050794786938791, + "loss": 3.354, + "step": 1938 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008048725361816454, + "loss": 3.1559, + "step": 1939 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008046655105032273, + "loss": 3.379, + "step": 1940 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008044584017150997, + "loss": 3.1259, + "step": 1941 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008042512098737597, + "loss": 3.333, + "step": 1942 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008040439350357272, + "loss": 3.3844, + "step": 1943 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008038365772575444, + "loss": 3.2899, + "step": 1944 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008036291365957768, + "loss": 3.2145, + "step": 1945 + }, + { + "epoch": 0.31, + "learning_rate": 0.000803421613107012, + "loss": 3.2227, + "step": 1946 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008032140068478603, + "loss": 3.1662, + "step": 1947 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008030063178749548, + "loss": 3.3444, + "step": 1948 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008027985462449505, + "loss": 3.1388, + "step": 1949 + }, + { + "epoch": 0.31, + "learning_rate": 0.000802590692014526, + "loss": 3.2995, + "step": 1950 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008023827552403815, + "loss": 3.3751, + "step": 1951 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008021747359792403, + "loss": 3.3041, + "step": 1952 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008019666342878479, + "loss": 3.3719, + "step": 1953 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008017584502229723, + "loss": 3.2941, + "step": 1954 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008015501838414038, + "loss": 3.2941, + "step": 1955 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008013418351999561, + "loss": 3.3175, + "step": 1956 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008011334043554639, + "loss": 3.2465, + "step": 1957 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008009248913647855, + "loss": 3.2828, + "step": 1958 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008007162962848009, + "loss": 3.2684, + "step": 1959 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008005076191724128, + "loss": 3.4251, + "step": 1960 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008002988600845464, + "loss": 3.3131, + "step": 1961 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008000900190781489, + "loss": 3.2152, + "step": 1962 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007998810962101902, + "loss": 3.3632, + "step": 1963 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007996720915376623, + "loss": 3.3837, + "step": 1964 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007994630051175795, + "loss": 3.2259, + "step": 1965 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007992538370069787, + "loss": 3.2372, + "step": 1966 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007990445872629188, + "loss": 3.2951, + "step": 1967 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007988352559424808, + "loss": 3.3707, + "step": 1968 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007986258431027684, + "loss": 3.302, + "step": 1969 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007984163488009076, + "loss": 3.2188, + "step": 1970 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007982067730940457, + "loss": 3.1943, + "step": 1971 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007979971160393534, + "loss": 3.2798, + "step": 1972 + }, + { + "epoch": 0.32, + "learning_rate": 0.000797787377694023, + "loss": 3.3238, + "step": 1973 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007975775581152687, + "loss": 3.4319, + "step": 1974 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007973676573603274, + "loss": 3.3202, + "step": 1975 + }, + { + "epoch": 0.32, + "learning_rate": 0.000797157675486458, + "loss": 3.3417, + "step": 1976 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007969476125509414, + "loss": 3.4365, + "step": 1977 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007967374686110807, + "loss": 3.3685, + "step": 1978 + }, + { + "epoch": 0.32, + "learning_rate": 0.000796527243724201, + "loss": 3.2208, + "step": 1979 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007963169379476495, + "loss": 3.1208, + "step": 1980 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007961065513387956, + "loss": 3.2225, + "step": 1981 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007958960839550307, + "loss": 3.2271, + "step": 1982 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007956855358537682, + "loss": 3.2955, + "step": 1983 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007954749070924434, + "loss": 3.2951, + "step": 1984 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007952641977285137, + "loss": 3.2528, + "step": 1985 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007950534078194589, + "loss": 3.3092, + "step": 1986 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007948425374227799, + "loss": 3.1577, + "step": 1987 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007946315865960004, + "loss": 3.4106, + "step": 1988 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007944205553966653, + "loss": 3.1926, + "step": 1989 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007942094438823421, + "loss": 3.2895, + "step": 1990 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007939982521106198, + "loss": 3.2897, + "step": 1991 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007937869801391095, + "loss": 3.3033, + "step": 1992 + }, + { + "epoch": 0.32, + "learning_rate": 0.000793575628025444, + "loss": 3.244, + "step": 1993 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007933641958272782, + "loss": 3.2956, + "step": 1994 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007931526836022884, + "loss": 3.3539, + "step": 1995 + }, + { + "epoch": 0.32, + "learning_rate": 0.000792941091408173, + "loss": 3.2385, + "step": 1996 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007927294193026529, + "loss": 3.2053, + "step": 1997 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007925176673434693, + "loss": 3.3122, + "step": 1998 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007923058355883864, + "loss": 3.3112, + "step": 1999 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007920939240951899, + "loss": 3.3024, + "step": 2000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007918819329216871, + "loss": 3.2744, + "step": 2001 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007916698621257068, + "loss": 3.4105, + "step": 2002 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007914577117651001, + "loss": 3.3182, + "step": 2003 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007912454818977393, + "loss": 3.424, + "step": 2004 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007910331725815185, + "loss": 3.1176, + "step": 2005 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007908207838743541, + "loss": 3.2679, + "step": 2006 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007906083158341831, + "loss": 3.3404, + "step": 2007 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007903957685189649, + "loss": 3.2217, + "step": 2008 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007901831419866799, + "loss": 3.354, + "step": 2009 + }, + { + "epoch": 0.32, + "learning_rate": 0.000789970436295331, + "loss": 3.1425, + "step": 2010 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007897576515029421, + "loss": 3.3965, + "step": 2011 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007895447876675583, + "loss": 3.2766, + "step": 2012 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007893318448472474, + "loss": 3.4282, + "step": 2013 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007891188231000977, + "loss": 3.2903, + "step": 2014 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007889057224842193, + "loss": 3.3053, + "step": 2015 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007886925430577443, + "loss": 3.4105, + "step": 2016 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007884792848788256, + "loss": 3.2865, + "step": 2017 + }, + { + "epoch": 0.33, + "learning_rate": 0.000788265948005638, + "loss": 3.3477, + "step": 2018 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007880525324963776, + "loss": 3.1795, + "step": 2019 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007878390384092622, + "loss": 3.2196, + "step": 2020 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007876254658025307, + "loss": 3.2707, + "step": 2021 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007874118147344436, + "loss": 3.2618, + "step": 2022 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007871980852632829, + "loss": 3.3785, + "step": 2023 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007869842774473517, + "loss": 3.3158, + "step": 2024 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007867703913449747, + "loss": 3.3477, + "step": 2025 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007865564270144978, + "loss": 3.04, + "step": 2026 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007863423845142886, + "loss": 3.3698, + "step": 2027 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007861282639027354, + "loss": 3.4027, + "step": 2028 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007859140652382486, + "loss": 3.2539, + "step": 2029 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007856997885792589, + "loss": 3.2992, + "step": 2030 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007854854339842195, + "loss": 3.2591, + "step": 2031 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007852710015116036, + "loss": 3.4617, + "step": 2032 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007850564912199066, + "loss": 3.2176, + "step": 2033 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007848419031676445, + "loss": 3.2108, + "step": 2034 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007846272374133549, + "loss": 3.2394, + "step": 2035 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007844124940155967, + "loss": 3.2863, + "step": 2036 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007841976730329494, + "loss": 3.533, + "step": 2037 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007839827745240143, + "loss": 3.3082, + "step": 2038 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007837677985474132, + "loss": 3.4071, + "step": 2039 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007835527451617899, + "loss": 3.2687, + "step": 2040 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007833376144258085, + "loss": 3.2481, + "step": 2041 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007831224063981543, + "loss": 3.1558, + "step": 2042 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007829071211375345, + "loss": 3.3342, + "step": 2043 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007826917587026763, + "loss": 3.3242, + "step": 2044 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007824763191523285, + "loss": 3.1648, + "step": 2045 + }, + { + "epoch": 0.33, + "learning_rate": 0.000782260802545261, + "loss": 3.3914, + "step": 2046 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007820452089402645, + "loss": 3.2113, + "step": 2047 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007818295383961507, + "loss": 3.2991, + "step": 2048 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007816137909717526, + "loss": 3.3813, + "step": 2049 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007813979667259238, + "loss": 3.4531, + "step": 2050 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007811820657175389, + "loss": 3.1524, + "step": 2051 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007809660880054936, + "loss": 3.291, + "step": 2052 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007807500336487046, + "loss": 3.314, + "step": 2053 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007805339027061093, + "loss": 3.3191, + "step": 2054 + }, + { + "epoch": 0.33, + "learning_rate": 0.000780317695236666, + "loss": 3.2411, + "step": 2055 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007801014112993539, + "loss": 3.3343, + "step": 2056 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007798850509531732, + "loss": 3.3003, + "step": 2057 + }, + { + "epoch": 0.33, + "learning_rate": 0.000779668614257145, + "loss": 3.3541, + "step": 2058 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007794521012703106, + "loss": 3.1875, + "step": 2059 + }, + { + "epoch": 0.33, + "learning_rate": 0.000779235512051733, + "loss": 3.3534, + "step": 2060 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007790188466604955, + "loss": 3.3571, + "step": 2061 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007788021051557022, + "loss": 3.3341, + "step": 2062 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007785852875964778, + "loss": 3.1624, + "step": 2063 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007783683940419684, + "loss": 3.3886, + "step": 2064 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007781514245513401, + "loss": 3.2168, + "step": 2065 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007779343791837803, + "loss": 3.2119, + "step": 2066 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007777172579984963, + "loss": 3.2798, + "step": 2067 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007775000610547169, + "loss": 3.3222, + "step": 2068 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007772827884116915, + "loss": 3.3426, + "step": 2069 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007770654401286896, + "loss": 3.177, + "step": 2070 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007768480162650016, + "loss": 3.2953, + "step": 2071 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007766305168799385, + "loss": 3.2255, + "step": 2072 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007764129420328326, + "loss": 3.2328, + "step": 2073 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007761952917830353, + "loss": 3.3376, + "step": 2074 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007759775661899198, + "loss": 3.3519, + "step": 2075 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007757597653128796, + "loss": 3.3258, + "step": 2076 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007755418892113282, + "loss": 3.2734, + "step": 2077 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007753239379447005, + "loss": 3.266, + "step": 2078 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007751059115724511, + "loss": 3.3698, + "step": 2079 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007748878101540556, + "loss": 3.4313, + "step": 2080 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007746696337490098, + "loss": 3.2772, + "step": 2081 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007744513824168301, + "loss": 3.2807, + "step": 2082 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007742330562170532, + "loss": 3.3006, + "step": 2083 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007740146552092364, + "loss": 3.2362, + "step": 2084 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007737961794529573, + "loss": 3.4251, + "step": 2085 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007735776290078138, + "loss": 3.3316, + "step": 2086 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007733590039334242, + "loss": 3.3105, + "step": 2087 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007731403042894276, + "loss": 3.2811, + "step": 2088 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007729215301354828, + "loss": 3.3068, + "step": 2089 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007727026815312694, + "loss": 3.4135, + "step": 2090 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007724837585364868, + "loss": 3.2548, + "step": 2091 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007722647612108554, + "loss": 3.4309, + "step": 2092 + }, + { + "epoch": 0.34, + "learning_rate": 0.000772045689614115, + "loss": 3.1607, + "step": 2093 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007718265438060266, + "loss": 3.2631, + "step": 2094 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007716073238463709, + "loss": 3.1701, + "step": 2095 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007713880297949488, + "loss": 3.3182, + "step": 2096 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007711686617115814, + "loss": 3.4139, + "step": 2097 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007709492196561103, + "loss": 3.2526, + "step": 2098 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007707297036883972, + "loss": 3.4242, + "step": 2099 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007705101138683237, + "loss": 3.2089, + "step": 2100 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007702904502557919, + "loss": 3.3798, + "step": 2101 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007700707129107234, + "loss": 3.3468, + "step": 2102 + }, + { + "epoch": 0.34, + "learning_rate": 0.000769850901893061, + "loss": 3.2908, + "step": 2103 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007696310172627664, + "loss": 3.352, + "step": 2104 + }, + { + "epoch": 0.34, + "learning_rate": 0.000769411059079822, + "loss": 3.1928, + "step": 2105 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007691910274042306, + "loss": 3.3302, + "step": 2106 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007689709222960144, + "loss": 3.2059, + "step": 2107 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007687507438152157, + "loss": 3.2943, + "step": 2108 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007685304920218972, + "loss": 3.2335, + "step": 2109 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007683101669761412, + "loss": 3.2784, + "step": 2110 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007680897687380504, + "loss": 3.4007, + "step": 2111 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007678692973677472, + "loss": 3.3147, + "step": 2112 + }, + { + "epoch": 0.34, + "learning_rate": 0.000767648752925374, + "loss": 3.165, + "step": 2113 + }, + { + "epoch": 0.34, + "learning_rate": 0.000767428135471093, + "loss": 3.4647, + "step": 2114 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007672074450650863, + "loss": 3.2766, + "step": 2115 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007669866817675564, + "loss": 3.5702, + "step": 2116 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007667658456387251, + "loss": 3.3298, + "step": 2117 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007665449367388346, + "loss": 3.3949, + "step": 2118 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007663239551281461, + "loss": 3.2642, + "step": 2119 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007661029008669417, + "loss": 3.3703, + "step": 2120 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007658817740155224, + "loss": 3.2931, + "step": 2121 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007656605746342096, + "loss": 3.2367, + "step": 2122 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007654393027833443, + "loss": 3.2409, + "step": 2123 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007652179585232871, + "loss": 3.3024, + "step": 2124 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007649965419144187, + "loss": 3.4153, + "step": 2125 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007647750530171393, + "loss": 3.3285, + "step": 2126 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007645534918918687, + "loss": 3.2449, + "step": 2127 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007643318585990465, + "loss": 3.3336, + "step": 2128 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007641101531991324, + "loss": 3.2487, + "step": 2129 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007638883757526052, + "loss": 3.2595, + "step": 2130 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007636665263199637, + "loss": 3.1952, + "step": 2131 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007634446049617261, + "loss": 3.3774, + "step": 2132 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007632226117384302, + "loss": 3.3321, + "step": 2133 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007630005467106338, + "loss": 3.3728, + "step": 2134 + }, + { + "epoch": 0.34, + "learning_rate": 0.000762778409938914, + "loss": 3.2154, + "step": 2135 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007625562014838673, + "loss": 3.1877, + "step": 2136 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007623339214061101, + "loss": 3.2775, + "step": 2137 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007621115697662782, + "loss": 3.299, + "step": 2138 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007618891466250267, + "loss": 3.2165, + "step": 2139 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007616666520430304, + "loss": 3.3403, + "step": 2140 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007614440860809841, + "loss": 3.3165, + "step": 2141 + }, + { + "epoch": 0.35, + "learning_rate": 0.000761221448799601, + "loss": 3.2329, + "step": 2142 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007609987402596146, + "loss": 3.3127, + "step": 2143 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007607759605217777, + "loss": 3.3359, + "step": 2144 + }, + { + "epoch": 0.35, + "learning_rate": 0.000760553109646862, + "loss": 3.368, + "step": 2145 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007603301876956592, + "loss": 3.3018, + "step": 2146 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007601071947289803, + "loss": 3.2157, + "step": 2147 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007598841308076553, + "loss": 3.2489, + "step": 2148 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007596609959925342, + "loss": 3.3598, + "step": 2149 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007594377903444856, + "loss": 3.2148, + "step": 2150 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007592145139243977, + "loss": 3.2342, + "step": 2151 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007589911667931785, + "loss": 3.4123, + "step": 2152 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007587677490117546, + "loss": 3.1564, + "step": 2153 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007585442606410721, + "loss": 3.2405, + "step": 2154 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007583207017420966, + "loss": 3.3394, + "step": 2155 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007580970723758123, + "loss": 3.1147, + "step": 2156 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007578733726032234, + "loss": 3.2013, + "step": 2157 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007576496024853532, + "loss": 3.3885, + "step": 2158 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007574257620832435, + "loss": 3.0919, + "step": 2159 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007572018514579557, + "loss": 3.2372, + "step": 2160 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007569778706705708, + "loss": 3.1643, + "step": 2161 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007567538197821882, + "loss": 3.3225, + "step": 2162 + }, + { + "epoch": 0.35, + "learning_rate": 0.000756529698853927, + "loss": 3.2682, + "step": 2163 + }, + { + "epoch": 0.35, + "learning_rate": 0.000756305507946925, + "loss": 3.3258, + "step": 2164 + }, + { + "epoch": 0.35, + "learning_rate": 0.000756081247122339, + "loss": 3.156, + "step": 2165 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007558569164413456, + "loss": 3.3805, + "step": 2166 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007556325159651399, + "loss": 3.2488, + "step": 2167 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007554080457549355, + "loss": 3.2599, + "step": 2168 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007551835058719663, + "loss": 3.3745, + "step": 2169 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007549588963774843, + "loss": 3.3421, + "step": 2170 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007547342173327606, + "loss": 3.2082, + "step": 2171 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007545094687990857, + "loss": 3.3218, + "step": 2172 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007542846508377687, + "loss": 3.3511, + "step": 2173 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007540597635101371, + "loss": 3.1408, + "step": 2174 + }, + { + "epoch": 0.35, + "learning_rate": 0.000753834806877539, + "loss": 3.1676, + "step": 2175 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007536097810013395, + "loss": 3.3126, + "step": 2176 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007533846859429234, + "loss": 3.0507, + "step": 2177 + }, + { + "epoch": 0.35, + "learning_rate": 0.000753159521763695, + "loss": 3.4192, + "step": 2178 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007529342885250763, + "loss": 3.2173, + "step": 2179 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007527089862885088, + "loss": 3.3216, + "step": 2180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007524836151154528, + "loss": 3.2036, + "step": 2181 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007522581750673871, + "loss": 3.1914, + "step": 2182 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007520326662058099, + "loss": 3.2593, + "step": 2183 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007518070885922374, + "loss": 3.2114, + "step": 2184 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007515814422882051, + "loss": 3.2457, + "step": 2185 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007513557273552669, + "loss": 3.0602, + "step": 2186 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007511299438549957, + "loss": 3.3307, + "step": 2187 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007509040918489829, + "loss": 3.3034, + "step": 2188 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007506781713988389, + "loss": 3.1123, + "step": 2189 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007504521825661923, + "loss": 3.0999, + "step": 2190 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007502261254126905, + "loss": 3.0838, + "step": 2191 + }, + { + "epoch": 0.35, + "learning_rate": 0.00075, + "loss": 3.3044, + "step": 2192 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007497738063898053, + "loss": 3.3323, + "step": 2193 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007495475446438098, + "loss": 3.3135, + "step": 2194 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007493212148237356, + "loss": 3.2394, + "step": 2195 + }, + { + "epoch": 0.35, + "learning_rate": 0.000749094816991323, + "loss": 3.3311, + "step": 2196 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007488683512083309, + "loss": 3.2943, + "step": 2197 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007486418175365376, + "loss": 3.2907, + "step": 2198 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007484152160377385, + "loss": 3.2355, + "step": 2199 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007481885467737486, + "loss": 3.337, + "step": 2200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007479618098064009, + "loss": 3.2714, + "step": 2201 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007477350051975468, + "loss": 3.3108, + "step": 2202 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007475081330090567, + "loss": 3.1877, + "step": 2203 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007472811933028191, + "loss": 3.2094, + "step": 2204 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007470541861407404, + "loss": 3.1647, + "step": 2205 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007468271115847463, + "loss": 3.2426, + "step": 2206 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007465999696967804, + "loss": 3.2321, + "step": 2207 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007463727605388044, + "loss": 3.2257, + "step": 2208 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007461454841727992, + "loss": 3.3371, + "step": 2209 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007459181406607633, + "loss": 3.2265, + "step": 2210 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007456907300647137, + "loss": 3.2353, + "step": 2211 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007454632524466858, + "loss": 3.2555, + "step": 2212 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007452357078687332, + "loss": 3.2839, + "step": 2213 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007450080963929277, + "loss": 3.1861, + "step": 2214 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007447804180813596, + "loss": 3.2644, + "step": 2215 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007445526729961372, + "loss": 3.3463, + "step": 2216 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007443248611993872, + "loss": 3.3303, + "step": 2217 + }, + { + "epoch": 0.36, + "learning_rate": 0.000744096982753254, + "loss": 3.3225, + "step": 2218 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007438690377199011, + "loss": 3.2817, + "step": 2219 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007436410261615092, + "loss": 3.2854, + "step": 2220 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007434129481402777, + "loss": 3.4136, + "step": 2221 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007431848037184242, + "loss": 3.2205, + "step": 2222 + }, + { + "epoch": 0.36, + "learning_rate": 0.000742956592958184, + "loss": 3.368, + "step": 2223 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007427283159218107, + "loss": 3.1307, + "step": 2224 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007424999726715763, + "loss": 3.377, + "step": 2225 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007422715632697702, + "loss": 3.3145, + "step": 2226 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007420430877787003, + "loss": 3.3853, + "step": 2227 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007418145462606925, + "loss": 3.1315, + "step": 2228 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007415859387780906, + "loss": 3.1234, + "step": 2229 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007413572653932566, + "loss": 3.4594, + "step": 2230 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007411285261685701, + "loss": 3.3256, + "step": 2231 + }, + { + "epoch": 0.36, + "learning_rate": 0.000740899721166429, + "loss": 3.301, + "step": 2232 + }, + { + "epoch": 0.36, + "learning_rate": 0.000740670850449249, + "loss": 3.3586, + "step": 2233 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007404419140794634, + "loss": 3.2743, + "step": 2234 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007402129121195245, + "loss": 3.221, + "step": 2235 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007399838446319011, + "loss": 3.1987, + "step": 2236 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007397547116790807, + "loss": 3.1278, + "step": 2237 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007395255133235686, + "loss": 3.3863, + "step": 2238 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007392962496278879, + "loss": 3.2034, + "step": 2239 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007390669206545791, + "loss": 3.2001, + "step": 2240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007388375264662011, + "loss": 3.3175, + "step": 2241 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007386080671253305, + "loss": 3.1897, + "step": 2242 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007383785426945611, + "loss": 3.0812, + "step": 2243 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007381489532365051, + "loss": 3.3124, + "step": 2244 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007379192988137924, + "loss": 3.0756, + "step": 2245 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007376895794890699, + "loss": 3.2923, + "step": 2246 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007374597953250034, + "loss": 3.3694, + "step": 2247 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007372299463842753, + "loss": 3.2766, + "step": 2248 + }, + { + "epoch": 0.36, + "learning_rate": 0.000737000032729586, + "loss": 3.2082, + "step": 2249 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007367700544236541, + "loss": 3.3897, + "step": 2250 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007365400115292151, + "loss": 3.151, + "step": 2251 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007363099041090223, + "loss": 3.2438, + "step": 2252 + }, + { + "epoch": 0.36, + "learning_rate": 0.000736079732225847, + "loss": 3.2622, + "step": 2253 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007358494959424775, + "loss": 3.3762, + "step": 2254 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007356191953217202, + "loss": 3.244, + "step": 2255 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007353888304263987, + "loss": 3.2934, + "step": 2256 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007351584013193543, + "loss": 3.2493, + "step": 2257 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007349279080634456, + "loss": 3.2075, + "step": 2258 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007346973507215491, + "loss": 3.1229, + "step": 2259 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007344667293565582, + "loss": 3.4161, + "step": 2260 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007342360440313845, + "loss": 3.4066, + "step": 2261 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007340052948089564, + "loss": 3.1915, + "step": 2262 + }, + { + "epoch": 0.36, + "learning_rate": 0.00073377448175222, + "loss": 3.3072, + "step": 2263 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007335436049241391, + "loss": 3.3976, + "step": 2264 + }, + { + "epoch": 0.37, + "learning_rate": 0.000733312664387694, + "loss": 3.2397, + "step": 2265 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007330816602058835, + "loss": 3.2002, + "step": 2266 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007328505924417231, + "loss": 3.176, + "step": 2267 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007326194611582457, + "loss": 3.3226, + "step": 2268 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007323882664185016, + "loss": 3.2193, + "step": 2269 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007321570082855584, + "loss": 3.1747, + "step": 2270 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007319256868225011, + "loss": 3.211, + "step": 2271 + }, + { + "epoch": 0.37, + "learning_rate": 0.000731694302092432, + "loss": 3.3152, + "step": 2272 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007314628541584703, + "loss": 3.3018, + "step": 2273 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007312313430837528, + "loss": 3.2751, + "step": 2274 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007309997689314333, + "loss": 3.3193, + "step": 2275 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007307681317646831, + "loss": 3.2053, + "step": 2276 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007305364316466902, + "loss": 3.313, + "step": 2277 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007303046686406606, + "loss": 3.4025, + "step": 2278 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007300728428098165, + "loss": 3.2414, + "step": 2279 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007298409542173979, + "loss": 3.2211, + "step": 2280 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007296090029266613, + "loss": 3.2886, + "step": 2281 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007293769890008813, + "loss": 3.3432, + "step": 2282 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007291449125033485, + "loss": 3.3046, + "step": 2283 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007289127734973713, + "loss": 3.28, + "step": 2284 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007286805720462749, + "loss": 3.197, + "step": 2285 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007284483082134013, + "loss": 3.1771, + "step": 2286 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007282159820621101, + "loss": 3.2587, + "step": 2287 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007279835936557773, + "loss": 3.1347, + "step": 2288 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007277511430577961, + "loss": 3.2187, + "step": 2289 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007275186303315769, + "loss": 3.3697, + "step": 2290 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007272860555405468, + "loss": 3.3054, + "step": 2291 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007270534187481498, + "loss": 3.2354, + "step": 2292 + }, + { + "epoch": 0.37, + "learning_rate": 0.000726820720017847, + "loss": 3.3119, + "step": 2293 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007265879594131161, + "loss": 3.2453, + "step": 2294 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007263551369974522, + "loss": 3.3854, + "step": 2295 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007261222528343665, + "loss": 3.157, + "step": 2296 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007258893069873878, + "loss": 3.1915, + "step": 2297 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007256562995200614, + "loss": 3.2659, + "step": 2298 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007254232304959491, + "loss": 3.1568, + "step": 2299 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007251900999786303, + "loss": 3.2489, + "step": 2300 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007249569080317002, + "loss": 3.3377, + "step": 2301 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007247236547187715, + "loss": 3.2611, + "step": 2302 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007244903401034733, + "loss": 3.2788, + "step": 2303 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007242569642494516, + "loss": 3.3779, + "step": 2304 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007240235272203689, + "loss": 3.2913, + "step": 2305 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007237900290799044, + "loss": 3.2563, + "step": 2306 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007235564698917542, + "loss": 3.1967, + "step": 2307 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007233228497196309, + "loss": 3.3739, + "step": 2308 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007230891686272636, + "loss": 3.3081, + "step": 2309 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007228554266783985, + "loss": 3.1338, + "step": 2310 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007226216239367977, + "loss": 3.1505, + "step": 2311 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007223877604662403, + "loss": 3.2499, + "step": 2312 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007221538363305223, + "loss": 3.2739, + "step": 2313 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007219198515934556, + "loss": 3.3302, + "step": 2314 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007216858063188687, + "loss": 3.4127, + "step": 2315 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007214517005706073, + "loss": 3.4279, + "step": 2316 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007212175344125327, + "loss": 3.2479, + "step": 2317 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007209833079085231, + "loss": 3.3085, + "step": 2318 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007207490211224736, + "loss": 3.2179, + "step": 2319 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007205146741182946, + "loss": 3.457, + "step": 2320 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007202802669599143, + "loss": 3.1284, + "step": 2321 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007200457997112761, + "loss": 3.2329, + "step": 2322 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007198112724363407, + "loss": 3.226, + "step": 2323 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007195766851990846, + "loss": 3.2632, + "step": 2324 + }, + { + "epoch": 0.37, + "learning_rate": 0.000719342038063501, + "loss": 3.2964, + "step": 2325 + }, + { + "epoch": 0.38, + "learning_rate": 0.000719107331093599, + "loss": 3.2298, + "step": 2326 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007188725643534047, + "loss": 3.1379, + "step": 2327 + }, + { + "epoch": 0.38, + "learning_rate": 0.00071863773790696, + "loss": 3.1072, + "step": 2328 + }, + { + "epoch": 0.38, + "learning_rate": 0.000718402851818323, + "loss": 3.2268, + "step": 2329 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007181679061515684, + "loss": 3.2713, + "step": 2330 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007179329009707872, + "loss": 3.2938, + "step": 2331 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007176978363400864, + "loss": 3.3029, + "step": 2332 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007174627123235891, + "loss": 3.2513, + "step": 2333 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007172275289854349, + "loss": 3.4377, + "step": 2334 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007169922863897795, + "loss": 3.3368, + "step": 2335 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007167569846007946, + "loss": 3.1661, + "step": 2336 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007165216236826684, + "loss": 3.3538, + "step": 2337 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007162862036996048, + "loss": 3.2639, + "step": 2338 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007160507247158243, + "loss": 3.2069, + "step": 2339 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007158151867955629, + "loss": 3.2416, + "step": 2340 + }, + { + "epoch": 0.38, + "learning_rate": 0.000715579590003073, + "loss": 3.1853, + "step": 2341 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007153439344026233, + "loss": 3.3641, + "step": 2342 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007151082200584982, + "loss": 3.173, + "step": 2343 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007148724470349981, + "loss": 3.2297, + "step": 2344 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007146366153964399, + "loss": 3.1906, + "step": 2345 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007144007252071555, + "loss": 3.227, + "step": 2346 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007141647765314938, + "loss": 3.3371, + "step": 2347 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007139287694338192, + "loss": 3.2033, + "step": 2348 + }, + { + "epoch": 0.38, + "learning_rate": 0.000713692703978512, + "loss": 3.2108, + "step": 2349 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007134565802299686, + "loss": 3.309, + "step": 2350 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007132203982526012, + "loss": 3.2436, + "step": 2351 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007129841581108376, + "loss": 3.4498, + "step": 2352 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007127478598691222, + "loss": 3.1987, + "step": 2353 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007125115035919147, + "loss": 3.2257, + "step": 2354 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007122750893436904, + "loss": 3.2829, + "step": 2355 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007120386171889409, + "loss": 3.4217, + "step": 2356 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007118020871921736, + "loss": 3.211, + "step": 2357 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007115654994179115, + "loss": 3.4212, + "step": 2358 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007113288539306932, + "loss": 3.2172, + "step": 2359 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007110921507950733, + "loss": 3.2645, + "step": 2360 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007108553900756222, + "loss": 3.2434, + "step": 2361 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007106185718369258, + "loss": 3.3187, + "step": 2362 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007103816961435858, + "loss": 3.3843, + "step": 2363 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007101447630602192, + "loss": 3.2455, + "step": 2364 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007099077726514592, + "loss": 3.2321, + "step": 2365 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007096707249819545, + "loss": 3.2395, + "step": 2366 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007094336201163692, + "loss": 3.1378, + "step": 2367 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007091964581193833, + "loss": 3.2701, + "step": 2368 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007089592390556919, + "loss": 3.2501, + "step": 2369 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007087219629900066, + "loss": 3.1448, + "step": 2370 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007084846299870532, + "loss": 3.2279, + "step": 2371 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007082472401115742, + "loss": 3.2373, + "step": 2372 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007080097934283274, + "loss": 3.1273, + "step": 2373 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007077722900020853, + "loss": 3.2881, + "step": 2374 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007075347298976369, + "loss": 3.2207, + "step": 2375 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007072971131797861, + "loss": 3.4982, + "step": 2376 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007070594399133524, + "loss": 3.1521, + "step": 2377 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007068217101631705, + "loss": 3.2129, + "step": 2378 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007065839239940911, + "loss": 3.2518, + "step": 2379 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007063460814709795, + "loss": 3.3066, + "step": 2380 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007061081826587169, + "loss": 3.2251, + "step": 2381 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007058702276221998, + "loss": 3.2024, + "step": 2382 + }, + { + "epoch": 0.38, + "learning_rate": 0.00070563221642634, + "loss": 3.3269, + "step": 2383 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007053941491360642, + "loss": 3.1851, + "step": 2384 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007051560258163152, + "loss": 3.1468, + "step": 2385 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007049178465320506, + "loss": 3.2766, + "step": 2386 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007046796113482431, + "loss": 3.2584, + "step": 2387 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007044413203298812, + "loss": 3.2294, + "step": 2388 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007042029735419681, + "loss": 3.3003, + "step": 2389 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007039645710495224, + "loss": 3.2018, + "step": 2390 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007037261129175781, + "loss": 3.1521, + "step": 2391 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007034875992111839, + "loss": 3.3059, + "step": 2392 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007032490299954041, + "loss": 3.2848, + "step": 2393 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007030104053353184, + "loss": 3.2664, + "step": 2394 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007027717252960205, + "loss": 3.318, + "step": 2395 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007025329899426205, + "loss": 3.2827, + "step": 2396 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007022941993402428, + "loss": 3.1678, + "step": 2397 + }, + { + "epoch": 0.39, + "learning_rate": 0.000702055353554027, + "loss": 3.2703, + "step": 2398 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007018164526491281, + "loss": 3.1449, + "step": 2399 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007015774966907157, + "loss": 3.2486, + "step": 2400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007013384857439746, + "loss": 3.2396, + "step": 2401 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007010994198741046, + "loss": 3.2147, + "step": 2402 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007008602991463207, + "loss": 3.3983, + "step": 2403 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007006211236258523, + "loss": 3.1631, + "step": 2404 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007003818933779444, + "loss": 3.1418, + "step": 2405 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007001426084678563, + "loss": 3.2172, + "step": 2406 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006999032689608629, + "loss": 3.1305, + "step": 2407 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006996638749222534, + "loss": 3.2526, + "step": 2408 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006994244264173321, + "loss": 3.2125, + "step": 2409 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006991849235114183, + "loss": 3.3355, + "step": 2410 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006989453662698458, + "loss": 3.2209, + "step": 2411 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006987057547579636, + "loss": 3.2027, + "step": 2412 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006984660890411353, + "loss": 3.131, + "step": 2413 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006982263691847393, + "loss": 3.2785, + "step": 2414 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006979865952541687, + "loss": 3.2359, + "step": 2415 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006977467673148315, + "loss": 3.2334, + "step": 2416 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006975068854321504, + "loss": 3.3546, + "step": 2417 + }, + { + "epoch": 0.39, + "learning_rate": 0.000697266949671563, + "loss": 3.2672, + "step": 2418 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006970269600985208, + "loss": 3.2393, + "step": 2419 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006967869167784909, + "loss": 3.2753, + "step": 2420 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006965468197769547, + "loss": 3.321, + "step": 2421 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006963066691594084, + "loss": 3.1718, + "step": 2422 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006960664649913628, + "loss": 3.1256, + "step": 2423 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006958262073383424, + "loss": 3.1845, + "step": 2424 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006955858962658881, + "loss": 3.2462, + "step": 2425 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006953455318395538, + "loss": 3.3439, + "step": 2426 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006951051141249086, + "loss": 3.2748, + "step": 2427 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006948646431875362, + "loss": 3.0447, + "step": 2428 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006946241190930345, + "loss": 3.4248, + "step": 2429 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006943835419070161, + "loss": 3.3493, + "step": 2430 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006941429116951081, + "loss": 3.1887, + "step": 2431 + }, + { + "epoch": 0.39, + "learning_rate": 0.000693902228522952, + "loss": 3.2811, + "step": 2432 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006936614924562038, + "loss": 3.1866, + "step": 2433 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006934207035605338, + "loss": 3.0892, + "step": 2434 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006931798619016269, + "loss": 3.2421, + "step": 2435 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006929389675451823, + "loss": 3.3325, + "step": 2436 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006926980205569135, + "loss": 3.3181, + "step": 2437 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006924570210025485, + "loss": 3.1922, + "step": 2438 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006922159689478297, + "loss": 3.0082, + "step": 2439 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006919748644585133, + "loss": 3.1829, + "step": 2440 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006917337076003706, + "loss": 3.1297, + "step": 2441 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006914924984391866, + "loss": 3.1293, + "step": 2442 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006912512370407608, + "loss": 3.2214, + "step": 2443 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006910099234709069, + "loss": 3.2349, + "step": 2444 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006907685577954528, + "loss": 3.2974, + "step": 2445 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006905271400802405, + "loss": 3.3025, + "step": 2446 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006902856703911266, + "loss": 3.2207, + "step": 2447 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006900441487939817, + "loss": 3.1736, + "step": 2448 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006898025753546902, + "loss": 3.2478, + "step": 2449 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006895609501391509, + "loss": 3.3099, + "step": 2450 + }, + { + "epoch": 0.4, + "learning_rate": 0.000689319273213277, + "loss": 3.3642, + "step": 2451 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006890775446429955, + "loss": 3.2618, + "step": 2452 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006888357644942472, + "loss": 3.0745, + "step": 2453 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006885939328329878, + "loss": 3.2057, + "step": 2454 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006883520497251861, + "loss": 3.2935, + "step": 2455 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006881101152368259, + "loss": 3.366, + "step": 2456 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006878681294339042, + "loss": 3.2241, + "step": 2457 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006876260923824322, + "loss": 3.2848, + "step": 2458 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006873840041484356, + "loss": 3.275, + "step": 2459 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006871418647979532, + "loss": 3.1394, + "step": 2460 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006868996743970386, + "loss": 3.2407, + "step": 2461 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006866574330117585, + "loss": 3.1837, + "step": 2462 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006864151407081943, + "loss": 3.2849, + "step": 2463 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006861727975524407, + "loss": 3.2006, + "step": 2464 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006859304036106067, + "loss": 3.2295, + "step": 2465 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006856879589488147, + "loss": 3.3242, + "step": 2466 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006854454636332014, + "loss": 3.1532, + "step": 2467 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006852029177299169, + "loss": 3.2337, + "step": 2468 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006849603213051255, + "loss": 3.3271, + "step": 2469 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006847176744250049, + "loss": 3.2234, + "step": 2470 + }, + { + "epoch": 0.4, + "learning_rate": 0.000684474977155747, + "loss": 3.2485, + "step": 2471 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006842322295635569, + "loss": 3.279, + "step": 2472 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006839894317146538, + "loss": 3.0611, + "step": 2473 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006837465836752706, + "loss": 3.2719, + "step": 2474 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006835036855116539, + "loss": 3.2059, + "step": 2475 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006832607372900636, + "loss": 3.2122, + "step": 2476 + }, + { + "epoch": 0.4, + "learning_rate": 0.000683017739076774, + "loss": 3.3313, + "step": 2477 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006827746909380722, + "loss": 3.074, + "step": 2478 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006825315929402594, + "loss": 3.2372, + "step": 2479 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006822884451496505, + "loss": 3.153, + "step": 2480 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006820452476325733, + "loss": 3.2201, + "step": 2481 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006818020004553702, + "loss": 3.2218, + "step": 2482 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006815587036843964, + "loss": 3.2461, + "step": 2483 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006813153573860209, + "loss": 3.3799, + "step": 2484 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006810719616266259, + "loss": 3.4, + "step": 2485 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006808285164726076, + "loss": 3.1708, + "step": 2486 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006805850219903751, + "loss": 3.2029, + "step": 2487 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006803414782463516, + "loss": 3.131, + "step": 2488 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006800978853069733, + "loss": 3.3197, + "step": 2489 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006798542432386897, + "loss": 3.3108, + "step": 2490 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006796105521079643, + "loss": 3.1706, + "step": 2491 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006793668119812735, + "loss": 3.3288, + "step": 2492 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006791230229251067, + "loss": 3.2508, + "step": 2493 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006788791850059679, + "loss": 3.1876, + "step": 2494 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006786352982903732, + "loss": 3.2854, + "step": 2495 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006783913628448523, + "loss": 3.3168, + "step": 2496 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006781473787359488, + "loss": 3.2681, + "step": 2497 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006779033460302189, + "loss": 3.2489, + "step": 2498 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006776592647942322, + "loss": 3.0535, + "step": 2499 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006774151350945717, + "loss": 3.2502, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006771709569978337, + "loss": 3.1901, + "step": 2501 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006769267305706272, + "loss": 3.1329, + "step": 2502 + }, + { + "epoch": 0.4, + "learning_rate": 0.000676682455879575, + "loss": 3.2133, + "step": 2503 + }, + { + "epoch": 0.4, + "learning_rate": 0.000676438132991313, + "loss": 3.1973, + "step": 2504 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006761937619724894, + "loss": 3.3134, + "step": 2505 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006759493428897667, + "loss": 3.3521, + "step": 2506 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006757048758098199, + "loss": 3.2364, + "step": 2507 + }, + { + "epoch": 0.4, + "learning_rate": 0.000675460360799337, + "loss": 3.3886, + "step": 2508 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006752157979250195, + "loss": 3.308, + "step": 2509 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006749711872535816, + "loss": 3.0457, + "step": 2510 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006747265288517505, + "loss": 3.2852, + "step": 2511 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006744818227862668, + "loss": 3.2673, + "step": 2512 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006742370691238836, + "loss": 3.2516, + "step": 2513 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006739922679313677, + "loss": 3.2373, + "step": 2514 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006737474192754979, + "loss": 3.2346, + "step": 2515 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006735025232230668, + "loss": 3.0965, + "step": 2516 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006732575798408797, + "loss": 3.2551, + "step": 2517 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006730125891957543, + "loss": 3.1609, + "step": 2518 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006727675513545219, + "loss": 3.1434, + "step": 2519 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006725224663840264, + "loss": 3.3367, + "step": 2520 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006722773343511244, + "loss": 3.1853, + "step": 2521 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006720321553226854, + "loss": 3.3621, + "step": 2522 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006717869293655919, + "loss": 3.2767, + "step": 2523 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006715416565467392, + "loss": 3.3447, + "step": 2524 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006712963369330351, + "loss": 3.1636, + "step": 2525 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006710509705914005, + "loss": 3.2909, + "step": 2526 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006708055575887687, + "loss": 3.2877, + "step": 2527 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006705600979920861, + "loss": 3.3157, + "step": 2528 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006703145918683116, + "loss": 3.2956, + "step": 2529 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006700690392844166, + "loss": 3.3955, + "step": 2530 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006698234403073856, + "loss": 3.3652, + "step": 2531 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006695777950042155, + "loss": 3.3426, + "step": 2532 + }, + { + "epoch": 0.41, + "learning_rate": 0.000669332103441916, + "loss": 3.336, + "step": 2533 + }, + { + "epoch": 0.41, + "learning_rate": 0.000669086365687509, + "loss": 3.2321, + "step": 2534 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006688405818080298, + "loss": 3.3056, + "step": 2535 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006685947518705253, + "loss": 3.1641, + "step": 2536 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006683488759420555, + "loss": 3.2236, + "step": 2537 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006681029540896934, + "loss": 3.2956, + "step": 2538 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006678569863805234, + "loss": 3.2407, + "step": 2539 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006676109728816434, + "loss": 3.1761, + "step": 2540 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006673649136601634, + "loss": 3.2503, + "step": 2541 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006671188087832056, + "loss": 3.1361, + "step": 2542 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006668726583179054, + "loss": 3.2491, + "step": 2543 + }, + { + "epoch": 0.41, + "learning_rate": 0.00066662646233141, + "loss": 3.3543, + "step": 2544 + }, + { + "epoch": 0.41, + "learning_rate": 0.000666380220890879, + "loss": 3.165, + "step": 2545 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006661339340634848, + "loss": 3.2246, + "step": 2546 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006658876019164122, + "loss": 3.1885, + "step": 2547 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006656412245168578, + "loss": 3.1899, + "step": 2548 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006653948019320309, + "loss": 3.3509, + "step": 2549 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006651483342291535, + "loss": 3.2327, + "step": 2550 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006649018214754591, + "loss": 3.2743, + "step": 2551 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006646552637381942, + "loss": 3.316, + "step": 2552 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006644086610846171, + "loss": 3.2112, + "step": 2553 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006641620135819985, + "loss": 3.1752, + "step": 2554 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006639153212976217, + "loss": 3.2628, + "step": 2555 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006636685842987816, + "loss": 3.3566, + "step": 2556 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006634218026527856, + "loss": 3.2893, + "step": 2557 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006631749764269536, + "loss": 3.2105, + "step": 2558 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006629281056886167, + "loss": 3.2792, + "step": 2559 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006626811905051194, + "loss": 3.292, + "step": 2560 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006624342309438176, + "loss": 3.3569, + "step": 2561 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006621872270720789, + "loss": 3.0615, + "step": 2562 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006619401789572841, + "loss": 3.3385, + "step": 2563 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006616930866668252, + "loss": 3.3599, + "step": 2564 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006614459502681062, + "loss": 3.1784, + "step": 2565 + }, + { + "epoch": 0.41, + "learning_rate": 0.000661198769828544, + "loss": 3.2052, + "step": 2566 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006609515454155668, + "loss": 3.1359, + "step": 2567 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006607042770966147, + "loss": 3.1847, + "step": 2568 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006604569649391402, + "loss": 3.2265, + "step": 2569 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006602096090106077, + "loss": 3.2389, + "step": 2570 + }, + { + "epoch": 0.41, + "learning_rate": 0.000659962209378493, + "loss": 3.232, + "step": 2571 + }, + { + "epoch": 0.41, + "learning_rate": 0.000659714766110285, + "loss": 3.3135, + "step": 2572 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006594672792734831, + "loss": 3.3107, + "step": 2573 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006592197489355993, + "loss": 3.2807, + "step": 2574 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006589721751641578, + "loss": 3.3102, + "step": 2575 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006587245580266937, + "loss": 3.2197, + "step": 2576 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006584768975907552, + "loss": 3.3875, + "step": 2577 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006582291939239008, + "loss": 3.2672, + "step": 2578 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006579814470937021, + "loss": 3.2274, + "step": 2579 + }, + { + "epoch": 0.42, + "learning_rate": 0.000657733657167742, + "loss": 3.2129, + "step": 2580 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006574858242136146, + "loss": 3.3573, + "step": 2581 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006572379482989269, + "loss": 3.3165, + "step": 2582 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006569900294912968, + "loss": 3.2058, + "step": 2583 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006567420678583536, + "loss": 3.2085, + "step": 2584 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006564940634677392, + "loss": 3.2628, + "step": 2585 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006562460163871068, + "loss": 3.3337, + "step": 2586 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006559979266841209, + "loss": 3.1905, + "step": 2587 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006557497944264578, + "loss": 3.2537, + "step": 2588 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006555016196818062, + "loss": 3.3512, + "step": 2589 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006552534025178647, + "loss": 3.2943, + "step": 2590 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006550051430023452, + "loss": 3.2082, + "step": 2591 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006547568412029701, + "loss": 3.1531, + "step": 2592 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006545084971874737, + "loss": 3.1906, + "step": 2593 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006542601110236018, + "loss": 3.2806, + "step": 2594 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006540116827791118, + "loss": 3.3476, + "step": 2595 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006537632125217722, + "loss": 3.2024, + "step": 2596 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006535147003193634, + "loss": 3.2417, + "step": 2597 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006532661462396768, + "loss": 3.2972, + "step": 2598 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006530175503505157, + "loss": 3.1001, + "step": 2599 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006527689127196945, + "loss": 3.2191, + "step": 2600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006525202334150392, + "loss": 3.3669, + "step": 2601 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006522715125043868, + "loss": 3.3027, + "step": 2602 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006520227500555858, + "loss": 3.2313, + "step": 2603 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006517739461364965, + "loss": 3.3859, + "step": 2604 + }, + { + "epoch": 0.42, + "learning_rate": 0.00065152510081499, + "loss": 3.1639, + "step": 2605 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006512762141589486, + "loss": 3.175, + "step": 2606 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006510272862362662, + "loss": 3.1064, + "step": 2607 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006507783171148479, + "loss": 3.3029, + "step": 2608 + }, + { + "epoch": 0.42, + "learning_rate": 0.00065052930686261, + "loss": 3.2576, + "step": 2609 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006502802555474801, + "loss": 3.2599, + "step": 2610 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006500311632373967, + "loss": 3.1116, + "step": 2611 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006497820300003098, + "loss": 3.2654, + "step": 2612 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006495328559041805, + "loss": 3.166, + "step": 2613 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006492836410169809, + "loss": 3.3195, + "step": 2614 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006490343854066945, + "loss": 3.186, + "step": 2615 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006487850891413157, + "loss": 3.2709, + "step": 2616 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006485357522888496, + "loss": 3.2833, + "step": 2617 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006482863749173134, + "loss": 3.1862, + "step": 2618 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006480369570947347, + "loss": 3.2558, + "step": 2619 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006477874988891518, + "loss": 3.2192, + "step": 2620 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006475380003686147, + "loss": 3.3221, + "step": 2621 + }, + { + "epoch": 0.42, + "learning_rate": 0.000647288461601184, + "loss": 3.1782, + "step": 2622 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006470388826549314, + "loss": 3.3355, + "step": 2623 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006467892635979396, + "loss": 3.0389, + "step": 2624 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006465396044983023, + "loss": 3.3462, + "step": 2625 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006462899054241237, + "loss": 3.2048, + "step": 2626 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006460401664435195, + "loss": 3.2257, + "step": 2627 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006457903876246156, + "loss": 3.2329, + "step": 2628 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006455405690355497, + "loss": 3.2402, + "step": 2629 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006452907107444696, + "loss": 3.0645, + "step": 2630 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006450408128195338, + "loss": 3.3536, + "step": 2631 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006447908753289127, + "loss": 3.0372, + "step": 2632 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006445408983407861, + "loss": 3.1113, + "step": 2633 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006442908819233453, + "loss": 3.1191, + "step": 2634 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006440408261447927, + "loss": 3.2081, + "step": 2635 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006437907310733406, + "loss": 3.3645, + "step": 2636 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006435405967772126, + "loss": 3.2132, + "step": 2637 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006432904233246428, + "loss": 3.2448, + "step": 2638 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006430402107838762, + "loss": 3.1957, + "step": 2639 + }, + { + "epoch": 0.43, + "learning_rate": 0.000642789959223168, + "loss": 3.1748, + "step": 2640 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006425396687107845, + "loss": 3.1541, + "step": 2641 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006422893393150024, + "loss": 3.1863, + "step": 2642 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006420389711041091, + "loss": 3.1469, + "step": 2643 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006417885641464026, + "loss": 3.1419, + "step": 2644 + }, + { + "epoch": 0.43, + "learning_rate": 0.000641538118510191, + "loss": 3.238, + "step": 2645 + }, + { + "epoch": 0.43, + "learning_rate": 0.000641287634263794, + "loss": 3.1513, + "step": 2646 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006410371114755409, + "loss": 3.3119, + "step": 2647 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006407865502137717, + "loss": 3.2069, + "step": 2648 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006405359505468372, + "loss": 3.3763, + "step": 2649 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006402853125430985, + "loss": 3.2821, + "step": 2650 + }, + { + "epoch": 0.43, + "learning_rate": 0.000640034636270927, + "loss": 3.2266, + "step": 2651 + }, + { + "epoch": 0.43, + "learning_rate": 0.000639783921798705, + "loss": 3.1684, + "step": 2652 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006395331691948243, + "loss": 3.119, + "step": 2653 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006392823785276882, + "loss": 3.2322, + "step": 2654 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006390315498657098, + "loss": 3.2483, + "step": 2655 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006387806832773125, + "loss": 3.3167, + "step": 2656 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006385297788309303, + "loss": 3.1301, + "step": 2657 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006382788365950076, + "loss": 3.3424, + "step": 2658 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006380278566379987, + "loss": 3.2576, + "step": 2659 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006377768390283683, + "loss": 3.2985, + "step": 2660 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006375257838345921, + "loss": 3.1934, + "step": 2661 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006372746911251548, + "loss": 3.1864, + "step": 2662 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006370235609685522, + "loss": 3.3021, + "step": 2663 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006367723934332903, + "loss": 3.4077, + "step": 2664 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006365211885878848, + "loss": 3.3459, + "step": 2665 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006362699465008619, + "loss": 3.2732, + "step": 2666 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006360186672407583, + "loss": 3.3125, + "step": 2667 + }, + { + "epoch": 0.43, + "learning_rate": 0.00063576735087612, + "loss": 3.2195, + "step": 2668 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006355159974755039, + "loss": 3.2436, + "step": 2669 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006352646071074767, + "loss": 3.2048, + "step": 2670 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006350131798406149, + "loss": 3.2943, + "step": 2671 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006347617157435057, + "loss": 3.2185, + "step": 2672 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006345102148847458, + "loss": 3.2188, + "step": 2673 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006342586773329421, + "loss": 3.2676, + "step": 2674 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006340071031567115, + "loss": 3.117, + "step": 2675 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006337554924246812, + "loss": 3.3023, + "step": 2676 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006335038452054877, + "loss": 3.258, + "step": 2677 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006332521615677782, + "loss": 3.345, + "step": 2678 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006330004415802095, + "loss": 3.0818, + "step": 2679 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006327486853114478, + "loss": 3.3204, + "step": 2680 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006324968928301702, + "loss": 3.3297, + "step": 2681 + }, + { + "epoch": 0.43, + "learning_rate": 0.000632245064205063, + "loss": 3.0557, + "step": 2682 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006319931995048226, + "loss": 3.208, + "step": 2683 + }, + { + "epoch": 0.43, + "learning_rate": 0.000631741298798155, + "loss": 3.3319, + "step": 2684 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006314893621537765, + "loss": 3.4352, + "step": 2685 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006312373896404126, + "loss": 2.9215, + "step": 2686 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006309853813267989, + "loss": 3.3112, + "step": 2687 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006307333372816811, + "loss": 3.203, + "step": 2688 + }, + { + "epoch": 0.43, + "learning_rate": 0.000630481257573814, + "loss": 3.2017, + "step": 2689 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006302291422719623, + "loss": 3.2631, + "step": 2690 + }, + { + "epoch": 0.43, + "learning_rate": 0.000629976991444901, + "loss": 3.3584, + "step": 2691 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006297248051614137, + "loss": 3.1867, + "step": 2692 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006294725834902948, + "loss": 3.1731, + "step": 2693 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006292203265003479, + "loss": 3.1717, + "step": 2694 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006289680342603858, + "loss": 3.2928, + "step": 2695 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006287157068392315, + "loss": 3.2615, + "step": 2696 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006284633443057175, + "loss": 3.2823, + "step": 2697 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006282109467286856, + "loss": 3.3115, + "step": 2698 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006279585141769874, + "loss": 3.3554, + "step": 2699 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006277060467194841, + "loss": 3.3139, + "step": 2700 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006274535444250462, + "loss": 3.2183, + "step": 2701 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006272010073625538, + "loss": 3.2839, + "step": 2702 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006269484356008963, + "loss": 3.2324, + "step": 2703 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006266958292089732, + "loss": 3.1865, + "step": 2704 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006264431882556927, + "loss": 3.1163, + "step": 2705 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006261905128099726, + "loss": 3.2799, + "step": 2706 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006259378029407406, + "loss": 3.2576, + "step": 2707 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006256850587169334, + "loss": 3.2953, + "step": 2708 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006254322802074965, + "loss": 3.2036, + "step": 2709 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006251794674813862, + "loss": 3.2261, + "step": 2710 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006249266206075668, + "loss": 3.2979, + "step": 2711 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006246737396550124, + "loss": 3.1635, + "step": 2712 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006244208246927066, + "loss": 3.2663, + "step": 2713 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006241678757896419, + "loss": 3.3069, + "step": 2714 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006239148930148203, + "loss": 3.2022, + "step": 2715 + }, + { + "epoch": 0.44, + "learning_rate": 0.000623661876437253, + "loss": 3.3226, + "step": 2716 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006234088261259604, + "loss": 3.1836, + "step": 2717 + }, + { + "epoch": 0.44, + "learning_rate": 0.000623155742149972, + "loss": 3.476, + "step": 2718 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006229026245783269, + "loss": 3.2331, + "step": 2719 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006226494734800725, + "loss": 3.2609, + "step": 2720 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006223962889242663, + "loss": 3.282, + "step": 2721 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006221430709799745, + "loss": 3.2676, + "step": 2722 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006218898197162723, + "loss": 3.3403, + "step": 2723 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006216365352022441, + "loss": 3.1192, + "step": 2724 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006213832175069837, + "loss": 3.2273, + "step": 2725 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006211298666995932, + "loss": 3.1297, + "step": 2726 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006208764828491844, + "loss": 3.3427, + "step": 2727 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006206230660248777, + "loss": 3.3057, + "step": 2728 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006203696162958029, + "loss": 3.1609, + "step": 2729 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006201161337310986, + "loss": 3.2403, + "step": 2730 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006198626183999121, + "loss": 3.1572, + "step": 2731 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006196090703713998, + "loss": 3.2182, + "step": 2732 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006193554897147275, + "loss": 3.2134, + "step": 2733 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006191018764990689, + "loss": 3.4599, + "step": 2734 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006188482307936076, + "loss": 3.1646, + "step": 2735 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006185945526675355, + "loss": 3.2858, + "step": 2736 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006183408421900533, + "loss": 3.2918, + "step": 2737 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006180870994303707, + "loss": 3.2482, + "step": 2738 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006178333244577066, + "loss": 3.371, + "step": 2739 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006175795173412877, + "loss": 3.418, + "step": 2740 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006173256781503505, + "loss": 3.0703, + "step": 2741 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006170718069541395, + "loss": 3.3643, + "step": 2742 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006168179038219083, + "loss": 3.1886, + "step": 2743 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006165639688229193, + "loss": 3.1264, + "step": 2744 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006163100020264434, + "loss": 3.1647, + "step": 2745 + }, + { + "epoch": 0.44, + "learning_rate": 0.00061605600350176, + "loss": 3.3302, + "step": 2746 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006158019733181573, + "loss": 3.3022, + "step": 2747 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006155479115449328, + "loss": 3.1757, + "step": 2748 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006152938182513912, + "loss": 3.3729, + "step": 2749 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006150396935068471, + "loss": 3.251, + "step": 2750 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006147855373806233, + "loss": 3.1539, + "step": 2751 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006145313499420506, + "loss": 3.2478, + "step": 2752 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006142771312604692, + "loss": 3.2511, + "step": 2753 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006140228814052273, + "loss": 3.2792, + "step": 2754 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006137686004456816, + "loss": 3.1717, + "step": 2755 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006135142884511975, + "loss": 3.2796, + "step": 2756 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006132599454911489, + "loss": 3.179, + "step": 2757 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006130055716349178, + "loss": 3.2311, + "step": 2758 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006127511669518949, + "loss": 3.1269, + "step": 2759 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006124967315114795, + "loss": 3.1684, + "step": 2760 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006122422653830786, + "loss": 3.2389, + "step": 2761 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006119877686361084, + "loss": 3.1949, + "step": 2762 + }, + { + "epoch": 0.45, + "learning_rate": 0.000611733241339993, + "loss": 3.211, + "step": 2763 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006114786835641647, + "loss": 3.2839, + "step": 2764 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006112240953780644, + "loss": 3.4735, + "step": 2765 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006109694768511414, + "loss": 3.2524, + "step": 2766 + }, + { + "epoch": 0.45, + "learning_rate": 0.000610714828052853, + "loss": 3.2904, + "step": 2767 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006104601490526649, + "loss": 3.2764, + "step": 2768 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006102054399200507, + "loss": 3.1725, + "step": 2769 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006099507007244929, + "loss": 3.2126, + "step": 2770 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006096959315354815, + "loss": 3.263, + "step": 2771 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006094411324225152, + "loss": 3.1602, + "step": 2772 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006091863034551006, + "loss": 3.2716, + "step": 2773 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006089314447027525, + "loss": 3.1168, + "step": 2774 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006086765562349937, + "loss": 3.2818, + "step": 2775 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006084216381213553, + "loss": 3.1495, + "step": 2776 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006081666904313766, + "loss": 3.2694, + "step": 2777 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006079117132346043, + "loss": 3.2119, + "step": 2778 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006076567066005944, + "loss": 3.2002, + "step": 2779 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006074016705989094, + "loss": 3.2411, + "step": 2780 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006071466052991209, + "loss": 3.1096, + "step": 2781 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006068915107708084, + "loss": 3.2159, + "step": 2782 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006066363870835587, + "loss": 3.3051, + "step": 2783 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006063812343069672, + "loss": 3.236, + "step": 2784 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006061260525106372, + "loss": 3.0784, + "step": 2785 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006058708417641795, + "loss": 3.2405, + "step": 2786 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006056156021372131, + "loss": 3.3285, + "step": 2787 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006053603336993649, + "loss": 3.2654, + "step": 2788 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006051050365202695, + "loss": 3.3196, + "step": 2789 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006048497106695694, + "loss": 3.1975, + "step": 2790 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006045943562169151, + "loss": 3.2366, + "step": 2791 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006043389732319645, + "loss": 3.4659, + "step": 2792 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006040835617843836, + "loss": 3.2157, + "step": 2793 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006038281219438463, + "loss": 3.3577, + "step": 2794 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006035726537800339, + "loss": 3.3213, + "step": 2795 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006033171573626355, + "loss": 3.1815, + "step": 2796 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006030616327613482, + "loss": 3.2646, + "step": 2797 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006028060800458762, + "loss": 3.2252, + "step": 2798 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006025504992859321, + "loss": 3.1211, + "step": 2799 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006022948905512355, + "loss": 3.3619, + "step": 2800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006020392539115138, + "loss": 3.089, + "step": 2801 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006017835894365027, + "loss": 3.2625, + "step": 2802 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006015278971959444, + "loss": 3.1854, + "step": 2803 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006012721772595893, + "loss": 3.2827, + "step": 2804 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006010164296971953, + "loss": 3.3445, + "step": 2805 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006007606545785278, + "loss": 3.2105, + "step": 2806 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006005048519733596, + "loss": 3.2327, + "step": 2807 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006002490219514712, + "loss": 3.3197, + "step": 2808 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005999931645826505, + "loss": 3.1442, + "step": 2809 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005997372799366927, + "loss": 3.2893, + "step": 2810 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005994813680834008, + "loss": 3.1903, + "step": 2811 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005992254290925847, + "loss": 3.3819, + "step": 2812 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005989694630340622, + "loss": 3.248, + "step": 2813 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005987134699776583, + "loss": 3.1616, + "step": 2814 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005984574499932052, + "loss": 3.2481, + "step": 2815 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005982014031505427, + "loss": 3.1431, + "step": 2816 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005979453295195179, + "loss": 3.256, + "step": 2817 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005976892291699848, + "loss": 3.2701, + "step": 2818 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005974331021718056, + "loss": 3.3866, + "step": 2819 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005971769485948488, + "loss": 3.1709, + "step": 2820 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005969207685089901, + "loss": 3.2827, + "step": 2821 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005966645619841139, + "loss": 3.3235, + "step": 2822 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005964083290901101, + "loss": 3.3103, + "step": 2823 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005961520698968766, + "loss": 3.4071, + "step": 2824 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005958957844743182, + "loss": 3.3132, + "step": 2825 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005956394728923474, + "loss": 3.2692, + "step": 2826 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005953831352208831, + "loss": 3.2395, + "step": 2827 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005951267715298516, + "loss": 3.3205, + "step": 2828 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005948703818891869, + "loss": 3.2166, + "step": 2829 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005946139663688288, + "loss": 3.2329, + "step": 2830 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005943575250387253, + "loss": 3.3753, + "step": 2831 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005941010579688312, + "loss": 3.2393, + "step": 2832 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005938445652291078, + "loss": 3.2916, + "step": 2833 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005935880468895239, + "loss": 3.2894, + "step": 2834 + }, + { + "epoch": 0.46, + "learning_rate": 0.000593331503020055, + "loss": 3.1294, + "step": 2835 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005930749336906841, + "loss": 3.1199, + "step": 2836 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005928183389714004, + "loss": 3.3385, + "step": 2837 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005925617189322004, + "loss": 3.154, + "step": 2838 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005923050736430876, + "loss": 3.1568, + "step": 2839 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005920484031740722, + "loss": 3.242, + "step": 2840 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005917917075951714, + "loss": 3.1087, + "step": 2841 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005915349869764092, + "loss": 3.2438, + "step": 2842 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005912782413878164, + "loss": 3.3033, + "step": 2843 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005910214708994306, + "loss": 3.411, + "step": 2844 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005907646755812963, + "loss": 3.2507, + "step": 2845 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005905078555034645, + "loss": 3.2788, + "step": 2846 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005902510107359934, + "loss": 3.1932, + "step": 2847 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005899941413489477, + "loss": 3.2109, + "step": 2848 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005897372474123985, + "loss": 3.3343, + "step": 2849 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005894803289964242, + "loss": 3.2959, + "step": 2850 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005892233861711094, + "loss": 3.2676, + "step": 2851 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005889664190065456, + "loss": 3.3498, + "step": 2852 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005887094275728309, + "loss": 3.2089, + "step": 2853 + }, + { + "epoch": 0.46, + "learning_rate": 0.00058845241194007, + "loss": 3.2367, + "step": 2854 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005881953721783743, + "loss": 3.09, + "step": 2855 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005879383083578614, + "loss": 3.234, + "step": 2856 + }, + { + "epoch": 0.46, + "learning_rate": 0.000587681220548656, + "loss": 3.3249, + "step": 2857 + }, + { + "epoch": 0.46, + "learning_rate": 0.000587424108820889, + "loss": 3.1046, + "step": 2858 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005871669732446978, + "loss": 3.2977, + "step": 2859 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005869098138902265, + "loss": 3.1625, + "step": 2860 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005866526308276258, + "loss": 3.2562, + "step": 2861 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005863954241270524, + "loss": 3.3571, + "step": 2862 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005861381938586699, + "loss": 3.3367, + "step": 2863 + }, + { + "epoch": 0.46, + "learning_rate": 0.000585880940092648, + "loss": 3.1936, + "step": 2864 + }, + { + "epoch": 0.46, + "learning_rate": 0.000585623662899163, + "loss": 3.2304, + "step": 2865 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005853663623483977, + "loss": 3.2672, + "step": 2866 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005851090385105409, + "loss": 3.0635, + "step": 2867 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005848516914557881, + "loss": 3.2419, + "step": 2868 + }, + { + "epoch": 0.46, + "learning_rate": 0.000584594321254341, + "loss": 3.284, + "step": 2869 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005843369279764076, + "loss": 3.113, + "step": 2870 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005840795116922022, + "loss": 3.2406, + "step": 2871 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005838220724719454, + "loss": 3.2539, + "step": 2872 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005835646103858641, + "loss": 3.2641, + "step": 2873 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005833071255041914, + "loss": 3.1373, + "step": 2874 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005830496178971665, + "loss": 3.2945, + "step": 2875 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005827920876350349, + "loss": 3.3018, + "step": 2876 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005825345347880484, + "loss": 3.1548, + "step": 2877 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005822769594264649, + "loss": 3.4063, + "step": 2878 + }, + { + "epoch": 0.46, + "learning_rate": 0.000582019361620548, + "loss": 3.2583, + "step": 2879 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005817617414405684, + "loss": 3.3856, + "step": 2880 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005815040989568018, + "loss": 3.1103, + "step": 2881 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005812464342395307, + "loss": 3.1975, + "step": 2882 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005809887473590435, + "loss": 3.3603, + "step": 2883 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005807310383856345, + "loss": 3.2184, + "step": 2884 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005804733073896044, + "loss": 3.2268, + "step": 2885 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005802155544412593, + "loss": 3.191, + "step": 2886 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005799577796109117, + "loss": 3.1723, + "step": 2887 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005796999829688802, + "loss": 3.0655, + "step": 2888 + }, + { + "epoch": 0.47, + "learning_rate": 0.000579442164585489, + "loss": 3.1588, + "step": 2889 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005791843245310683, + "loss": 3.3678, + "step": 2890 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005789264628759545, + "loss": 3.0815, + "step": 2891 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005786685796904897, + "loss": 3.2768, + "step": 2892 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005784106750450215, + "loss": 3.391, + "step": 2893 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005781527490099042, + "loss": 3.2984, + "step": 2894 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005778948016554971, + "loss": 3.1969, + "step": 2895 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005776368330521656, + "loss": 3.2379, + "step": 2896 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005773788432702814, + "loss": 3.3042, + "step": 2897 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005771208323802213, + "loss": 3.2272, + "step": 2898 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005768628004523678, + "loss": 3.2949, + "step": 2899 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005766047475571098, + "loss": 3.3118, + "step": 2900 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005763466737648415, + "loss": 3.2878, + "step": 2901 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005760885791459627, + "loss": 3.2965, + "step": 2902 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005758304637708791, + "loss": 3.1834, + "step": 2903 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005755723277100021, + "loss": 3.2982, + "step": 2904 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005753141710337486, + "loss": 3.2503, + "step": 2905 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005750559938125411, + "loss": 3.2491, + "step": 2906 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005747977961168078, + "loss": 3.2223, + "step": 2907 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005745395780169826, + "loss": 3.1516, + "step": 2908 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005742813395835045, + "loss": 3.339, + "step": 2909 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005740230808868185, + "loss": 3.105, + "step": 2910 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005737648019973753, + "loss": 3.2541, + "step": 2911 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005735065029856304, + "loss": 3.4053, + "step": 2912 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005732481839220453, + "loss": 3.1824, + "step": 2913 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005729898448770869, + "loss": 3.2087, + "step": 2914 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005727314859212275, + "loss": 3.1342, + "step": 2915 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005724731071249449, + "loss": 3.0929, + "step": 2916 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005722147085587223, + "loss": 3.2516, + "step": 2917 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005719562902930479, + "loss": 3.4588, + "step": 2918 + }, + { + "epoch": 0.47, + "learning_rate": 0.000571697852398416, + "loss": 3.0319, + "step": 2919 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005714393949453258, + "loss": 3.1345, + "step": 2920 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005711809180042819, + "loss": 3.2252, + "step": 2921 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005709224216457941, + "loss": 3.3952, + "step": 2922 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005706639059403778, + "loss": 3.0421, + "step": 2923 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005704053709585533, + "loss": 3.3343, + "step": 2924 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005701468167708467, + "loss": 3.2704, + "step": 2925 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005698882434477887, + "loss": 3.1814, + "step": 2926 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005696296510599156, + "loss": 3.289, + "step": 2927 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005693710396777687, + "loss": 3.3108, + "step": 2928 + }, + { + "epoch": 0.47, + "learning_rate": 0.000569112409371895, + "loss": 3.0992, + "step": 2929 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005688537602128458, + "loss": 3.159, + "step": 2930 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005685950922711782, + "loss": 3.1889, + "step": 2931 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005683364056174545, + "loss": 3.2205, + "step": 2932 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005680777003222413, + "loss": 3.3713, + "step": 2933 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005678189764561113, + "loss": 3.2425, + "step": 2934 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005675602340896415, + "loss": 3.3798, + "step": 2935 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005673014732934143, + "loss": 3.4012, + "step": 2936 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005670426941380172, + "loss": 3.2409, + "step": 2937 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005667838966940423, + "loss": 3.2229, + "step": 2938 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005665250810320871, + "loss": 3.2976, + "step": 2939 + }, + { + "epoch": 0.47, + "learning_rate": 0.000566266247222754, + "loss": 3.1528, + "step": 2940 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005660073953366503, + "loss": 3.188, + "step": 2941 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005657485254443881, + "loss": 3.3084, + "step": 2942 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005654896376165846, + "loss": 3.1222, + "step": 2943 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005652307319238617, + "loss": 3.1588, + "step": 2944 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005649718084368463, + "loss": 3.2015, + "step": 2945 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005647128672261702, + "loss": 3.1567, + "step": 2946 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005644539083624701, + "loss": 3.1609, + "step": 2947 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005641949319163872, + "loss": 3.1626, + "step": 2948 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005639359379585679, + "loss": 3.0694, + "step": 2949 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005636769265596628, + "loss": 3.2203, + "step": 2950 + }, + { + "epoch": 0.48, + "learning_rate": 0.000563417897790328, + "loss": 3.3084, + "step": 2951 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005631588517212238, + "loss": 3.2316, + "step": 2952 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005628997884230154, + "loss": 3.1063, + "step": 2953 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005626407079663726, + "loss": 3.1869, + "step": 2954 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005623816104219701, + "loss": 3.227, + "step": 2955 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005621224958604868, + "loss": 3.2836, + "step": 2956 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005618633643526072, + "loss": 3.243, + "step": 2957 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005616042159690193, + "loss": 3.2233, + "step": 2958 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005613450507804161, + "loss": 3.1671, + "step": 2959 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005610858688574958, + "loss": 3.1712, + "step": 2960 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005608266702709602, + "loss": 3.2488, + "step": 2961 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005605674550915161, + "loss": 3.2241, + "step": 2962 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005603082233898751, + "loss": 3.163, + "step": 2963 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005600489752367528, + "loss": 3.2125, + "step": 2964 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005597897107028694, + "loss": 3.1831, + "step": 2965 + }, + { + "epoch": 0.48, + "learning_rate": 0.00055953042985895, + "loss": 3.1, + "step": 2966 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005592711327757236, + "loss": 2.9714, + "step": 2967 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005590118195239238, + "loss": 3.3674, + "step": 2968 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005587524901742889, + "loss": 3.3734, + "step": 2969 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005584931447975613, + "loss": 3.194, + "step": 2970 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005582337834644875, + "loss": 3.2746, + "step": 2971 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005579744062458191, + "loss": 3.1882, + "step": 2972 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005577150132123115, + "loss": 3.1259, + "step": 2973 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005574556044347245, + "loss": 3.2761, + "step": 2974 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005571961799838223, + "loss": 3.2351, + "step": 2975 + }, + { + "epoch": 0.48, + "learning_rate": 0.000556936739930373, + "loss": 3.1233, + "step": 2976 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005566772843451495, + "loss": 3.1026, + "step": 2977 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005564178132989288, + "loss": 3.3462, + "step": 2978 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005561583268624917, + "loss": 3.1032, + "step": 2979 + }, + { + "epoch": 0.48, + "learning_rate": 0.000555898825106624, + "loss": 3.2915, + "step": 2980 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005556393081021145, + "loss": 3.0523, + "step": 2981 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005553797759197574, + "loss": 3.2151, + "step": 2982 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005551202286303504, + "loss": 3.1518, + "step": 2983 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005548606663046951, + "loss": 3.2045, + "step": 2984 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005546010890135979, + "loss": 3.1733, + "step": 2985 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005543414968278687, + "loss": 3.2085, + "step": 2986 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005540818898183216, + "loss": 3.2113, + "step": 2987 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005538222680557749, + "loss": 3.0879, + "step": 2988 + }, + { + "epoch": 0.48, + "learning_rate": 0.000553562631611051, + "loss": 3.3749, + "step": 2989 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005533029805549756, + "loss": 3.232, + "step": 2990 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005530433149583795, + "loss": 3.3065, + "step": 2991 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005527836348920965, + "loss": 3.2166, + "step": 2992 + }, + { + "epoch": 0.48, + "learning_rate": 0.000552523940426965, + "loss": 3.0705, + "step": 2993 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005522642316338268, + "loss": 3.3184, + "step": 2994 + }, + { + "epoch": 0.48, + "learning_rate": 0.000552004508583528, + "loss": 3.216, + "step": 2995 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005517447713469185, + "loss": 3.2676, + "step": 2996 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005514850199948519, + "loss": 3.3614, + "step": 2997 + }, + { + "epoch": 0.48, + "learning_rate": 0.000551225254598186, + "loss": 3.2384, + "step": 2998 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005509654752277819, + "loss": 3.2557, + "step": 2999 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005507056819545049, + "loss": 3.3166, + "step": 3000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005504458748492243, + "loss": 3.1723, + "step": 3001 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005501860539828125, + "loss": 3.1236, + "step": 3002 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005499262194261462, + "loss": 3.2639, + "step": 3003 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005496663712501057, + "loss": 3.3128, + "step": 3004 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005494065095255751, + "loss": 3.2574, + "step": 3005 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005491466343234418, + "loss": 3.1444, + "step": 3006 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005488867457145974, + "loss": 3.2533, + "step": 3007 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005486268437699368, + "loss": 3.2704, + "step": 3008 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005483669285603585, + "loss": 3.2861, + "step": 3009 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005481070001567653, + "loss": 3.3433, + "step": 3010 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005478470586300625, + "loss": 3.2027, + "step": 3011 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005475871040511597, + "loss": 3.1678, + "step": 3012 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005473271364909702, + "loss": 3.3192, + "step": 3013 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005470671560204104, + "loss": 3.199, + "step": 3014 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005468071627104001, + "loss": 3.3145, + "step": 3015 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005465471566318633, + "loss": 3.195, + "step": 3016 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005462871378557267, + "loss": 3.218, + "step": 3017 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005460271064529211, + "loss": 3.1642, + "step": 3018 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005457670624943805, + "loss": 3.1151, + "step": 3019 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005455070060510419, + "loss": 3.3324, + "step": 3020 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005452469371938464, + "loss": 3.1659, + "step": 3021 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005449868559937385, + "loss": 3.1428, + "step": 3022 + }, + { + "epoch": 0.49, + "learning_rate": 0.000544726762521665, + "loss": 3.3396, + "step": 3023 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005444666568485774, + "loss": 3.1478, + "step": 3024 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005442065390454296, + "loss": 3.1821, + "step": 3025 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005439464091831795, + "loss": 3.0375, + "step": 3026 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005436862673327877, + "loss": 3.3832, + "step": 3027 + }, + { + "epoch": 0.49, + "learning_rate": 0.000543426113565218, + "loss": 3.2798, + "step": 3028 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005431659479514383, + "loss": 3.219, + "step": 3029 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005429057705624191, + "loss": 3.2747, + "step": 3030 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005426455814691337, + "loss": 3.165, + "step": 3031 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005423853807425596, + "loss": 3.0539, + "step": 3032 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005421251684536769, + "loss": 3.2583, + "step": 3033 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005418649446734684, + "loss": 3.1761, + "step": 3034 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005416047094729214, + "loss": 3.2972, + "step": 3035 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005413444629230248, + "loss": 3.1143, + "step": 3036 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005410842050947714, + "loss": 3.1442, + "step": 3037 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005408239360591572, + "loss": 3.3191, + "step": 3038 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005405636558871808, + "loss": 3.3667, + "step": 3039 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005403033646498439, + "loss": 3.2234, + "step": 3040 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005400430624181516, + "loss": 3.1733, + "step": 3041 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005397827492631116, + "loss": 3.104, + "step": 3042 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005395224252557347, + "loss": 3.1591, + "step": 3043 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005392620904670349, + "loss": 3.0896, + "step": 3044 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005390017449680287, + "loss": 3.3104, + "step": 3045 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005387413888297359, + "loss": 3.2191, + "step": 3046 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005384810221231789, + "loss": 3.2861, + "step": 3047 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005382206449193833, + "loss": 3.0633, + "step": 3048 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005379602572893774, + "loss": 3.2495, + "step": 3049 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005376998593041921, + "loss": 3.2491, + "step": 3050 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005374394510348617, + "loss": 3.1534, + "step": 3051 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005371790325524228, + "loss": 3.2734, + "step": 3052 + }, + { + "epoch": 0.49, + "learning_rate": 0.000536918603927915, + "loss": 3.2062, + "step": 3053 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005366581652323807, + "loss": 3.2644, + "step": 3054 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005363977165368649, + "loss": 3.188, + "step": 3055 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005361372579124152, + "loss": 3.1875, + "step": 3056 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005358767894300824, + "loss": 3.303, + "step": 3057 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005356163111609198, + "loss": 3.1102, + "step": 3058 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005353558231759828, + "loss": 3.2855, + "step": 3059 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005350953255463304, + "loss": 3.2437, + "step": 3060 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005348348183430236, + "loss": 3.3033, + "step": 3061 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005345743016371261, + "loss": 3.2723, + "step": 3062 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005343137754997042, + "loss": 3.1542, + "step": 3063 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005340532400018269, + "loss": 3.2267, + "step": 3064 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005337926952145658, + "loss": 3.3288, + "step": 3065 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005335321412089947, + "loss": 3.2769, + "step": 3066 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005332715780561904, + "loss": 3.1983, + "step": 3067 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005330110058272316, + "loss": 3.356, + "step": 3068 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005327504245932, + "loss": 3.3256, + "step": 3069 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005324898344251796, + "loss": 3.2436, + "step": 3070 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005322292353942568, + "loss": 3.1418, + "step": 3071 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005319686275715202, + "loss": 3.3643, + "step": 3072 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005317080110280613, + "loss": 3.0293, + "step": 3073 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005314473858349733, + "loss": 3.2588, + "step": 3074 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005311867520633526, + "loss": 3.2127, + "step": 3075 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005309261097842974, + "loss": 3.1018, + "step": 3076 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005306654590689079, + "loss": 3.3698, + "step": 3077 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005304047999882874, + "loss": 3.2209, + "step": 3078 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005301441326135412, + "loss": 3.3358, + "step": 3079 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005298834570157763, + "loss": 3.1811, + "step": 3080 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005296227732661028, + "loss": 3.3593, + "step": 3081 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005293620814356326, + "loss": 3.1968, + "step": 3082 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005291013815954795, + "loss": 3.2258, + "step": 3083 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005288406738167601, + "loss": 3.3108, + "step": 3084 + }, + { + "epoch": 0.5, + "learning_rate": 0.000528579958170593, + "loss": 3.1465, + "step": 3085 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005283192347280984, + "loss": 3.1123, + "step": 3086 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005280585035603993, + "loss": 3.3332, + "step": 3087 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005277977647386207, + "loss": 3.1474, + "step": 3088 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005275370183338893, + "loss": 3.1415, + "step": 3089 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005272762644173342, + "loss": 3.1128, + "step": 3090 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005270155030600866, + "loss": 3.105, + "step": 3091 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005267547343332793, + "loss": 3.1629, + "step": 3092 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005264939583080477, + "loss": 3.2143, + "step": 3093 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005262331750555287, + "loss": 3.2167, + "step": 3094 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005259723846468615, + "loss": 3.3578, + "step": 3095 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005257115871531873, + "loss": 3.2177, + "step": 3096 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005254507826456483, + "loss": 3.1446, + "step": 3097 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005251899711953903, + "loss": 3.2223, + "step": 3098 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005249291528735594, + "loss": 3.1324, + "step": 3099 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005246683277513044, + "loss": 3.2095, + "step": 3100 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005244074958997762, + "loss": 3.2255, + "step": 3101 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005241466573901267, + "loss": 3.2232, + "step": 3102 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005238858122935099, + "loss": 3.2391, + "step": 3103 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005236249606810821, + "loss": 3.1954, + "step": 3104 + }, + { + "epoch": 0.5, + "learning_rate": 0.000523364102624001, + "loss": 3.3927, + "step": 3105 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005231032381934256, + "loss": 3.2091, + "step": 3106 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005228423674605179, + "loss": 3.1559, + "step": 3107 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005225814904964402, + "loss": 3.2105, + "step": 3108 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005223206073723573, + "loss": 3.3645, + "step": 3109 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005220597181594356, + "loss": 3.1628, + "step": 3110 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005217988229288431, + "loss": 3.1492, + "step": 3111 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005215379217517492, + "loss": 3.1273, + "step": 3112 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005212770146993254, + "loss": 3.1202, + "step": 3113 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005210161018427443, + "loss": 3.1059, + "step": 3114 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005207551832531803, + "loss": 3.2784, + "step": 3115 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005204942590018098, + "loss": 3.3378, + "step": 3116 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005202333291598099, + "loss": 3.1373, + "step": 3117 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005199723937983599, + "loss": 3.1663, + "step": 3118 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005197114529886402, + "loss": 3.3453, + "step": 3119 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005194505068018328, + "loss": 3.0295, + "step": 3120 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005191895553091216, + "loss": 3.4432, + "step": 3121 + }, + { + "epoch": 0.5, + "learning_rate": 0.000518928598581691, + "loss": 3.2766, + "step": 3122 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005186676366907278, + "loss": 3.105, + "step": 3123 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005184066697074197, + "loss": 3.1081, + "step": 3124 + }, + { + "epoch": 0.5, + "learning_rate": 0.000518145697702956, + "loss": 3.3497, + "step": 3125 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005178847207485268, + "loss": 3.2287, + "step": 3126 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005176237389153246, + "loss": 3.159, + "step": 3127 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005173627522745422, + "loss": 3.2109, + "step": 3128 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005171017608973744, + "loss": 3.1443, + "step": 3129 + }, + { + "epoch": 0.5, + "learning_rate": 0.000516840764855017, + "loss": 3.2476, + "step": 3130 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005165797642186671, + "loss": 3.0899, + "step": 3131 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005163187590595229, + "loss": 3.2179, + "step": 3132 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005160577494487843, + "loss": 3.2804, + "step": 3133 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005157967354576518, + "loss": 3.3433, + "step": 3134 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005155357171573276, + "loss": 3.3916, + "step": 3135 + }, + { + "epoch": 0.51, + "learning_rate": 0.000515274694619015, + "loss": 3.2238, + "step": 3136 + }, + { + "epoch": 0.51, + "learning_rate": 0.000515013667913918, + "loss": 3.1708, + "step": 3137 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005147526371132424, + "loss": 3.1294, + "step": 3138 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005144916022881949, + "loss": 3.2582, + "step": 3139 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005142305635099826, + "loss": 3.2925, + "step": 3140 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005139695208498147, + "loss": 3.245, + "step": 3141 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005137084743789013, + "loss": 3.1815, + "step": 3142 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005134474241684526, + "loss": 3.3845, + "step": 3143 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005131863702896809, + "loss": 3.1579, + "step": 3144 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005129253128137992, + "loss": 3.1075, + "step": 3145 + }, + { + "epoch": 0.51, + "learning_rate": 0.000512664251812021, + "loss": 3.1062, + "step": 3146 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005124031873555613, + "loss": 3.1656, + "step": 3147 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005121421195156361, + "loss": 3.0344, + "step": 3148 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005118810483634619, + "loss": 3.2101, + "step": 3149 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005116199739702563, + "loss": 3.24, + "step": 3150 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005113588964072378, + "loss": 3.1664, + "step": 3151 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005110978157456257, + "loss": 3.3337, + "step": 3152 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005108367320566403, + "loss": 3.2461, + "step": 3153 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005105756454115027, + "loss": 3.0171, + "step": 3154 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005103145558814344, + "loss": 3.1236, + "step": 3155 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005100534635376583, + "loss": 3.1632, + "step": 3156 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005097923684513979, + "loss": 3.1155, + "step": 3157 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005095312706938768, + "loss": 3.1794, + "step": 3158 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005092701703363205, + "loss": 3.209, + "step": 3159 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005090090674499542, + "loss": 3.1605, + "step": 3160 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005087479621060042, + "loss": 3.1132, + "step": 3161 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005084868543756974, + "loss": 3.2622, + "step": 3162 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005082257443302615, + "loss": 3.0867, + "step": 3163 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005079646320409249, + "loss": 3.05, + "step": 3164 + }, + { + "epoch": 0.51, + "learning_rate": 0.000507703517578916, + "loss": 3.3022, + "step": 3165 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005074424010154643, + "loss": 3.3712, + "step": 3166 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005071812824218001, + "loss": 3.2997, + "step": 3167 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005069201618691537, + "loss": 3.2909, + "step": 3168 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005066590394287562, + "loss": 3.3085, + "step": 3169 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005063979151718393, + "loss": 3.1279, + "step": 3170 + }, + { + "epoch": 0.51, + "learning_rate": 0.000506136789169635, + "loss": 3.277, + "step": 3171 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005058756614933757, + "loss": 3.3897, + "step": 3172 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005056145322142948, + "loss": 3.1298, + "step": 3173 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005053534014036256, + "loss": 3.197, + "step": 3174 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005050922691326017, + "loss": 3.2081, + "step": 3175 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005048311354724578, + "loss": 3.2495, + "step": 3176 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005045700004944283, + "loss": 3.273, + "step": 3177 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005043088642697482, + "loss": 3.2238, + "step": 3178 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005040477268696532, + "loss": 3.1562, + "step": 3179 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005037865883653785, + "loss": 3.342, + "step": 3180 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005035254488281604, + "loss": 3.3034, + "step": 3181 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005032643083292351, + "loss": 3.2256, + "step": 3182 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005030031669398389, + "loss": 3.2956, + "step": 3183 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005027420247312089, + "loss": 3.1793, + "step": 3184 + }, + { + "epoch": 0.51, + "learning_rate": 0.000502480881774582, + "loss": 3.1414, + "step": 3185 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005022197381411951, + "loss": 3.1477, + "step": 3186 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005019585939022859, + "loss": 3.2471, + "step": 3187 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005016974491290918, + "loss": 3.1409, + "step": 3188 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005014363038928506, + "loss": 3.3576, + "step": 3189 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005011751582648, + "loss": 3.0862, + "step": 3190 + }, + { + "epoch": 0.51, + "learning_rate": 0.000500914012316178, + "loss": 3.2099, + "step": 3191 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005006528661182225, + "loss": 3.2117, + "step": 3192 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005003917197421717, + "loss": 3.1058, + "step": 3193 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005001305732592636, + "loss": 3.3008, + "step": 3194 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004998694267407364, + "loss": 3.1614, + "step": 3195 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004996082802578285, + "loss": 3.2821, + "step": 3196 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004993471338817775, + "loss": 3.2097, + "step": 3197 + }, + { + "epoch": 0.52, + "learning_rate": 0.000499085987683822, + "loss": 3.3142, + "step": 3198 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004988248417352, + "loss": 3.237, + "step": 3199 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004985636961071495, + "loss": 2.9857, + "step": 3200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004983025508709082, + "loss": 3.1696, + "step": 3201 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004980414060977142, + "loss": 3.2288, + "step": 3202 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004977802618588049, + "loss": 3.1491, + "step": 3203 + }, + { + "epoch": 0.52, + "learning_rate": 0.000497519118225418, + "loss": 3.1794, + "step": 3204 + }, + { + "epoch": 0.52, + "learning_rate": 0.000497257975268791, + "loss": 3.365, + "step": 3205 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004969968330601611, + "loss": 3.1326, + "step": 3206 + }, + { + "epoch": 0.52, + "learning_rate": 0.000496735691670765, + "loss": 3.2382, + "step": 3207 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004964745511718397, + "loss": 3.1295, + "step": 3208 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004962134116346215, + "loss": 3.1542, + "step": 3209 + }, + { + "epoch": 0.52, + "learning_rate": 0.000495952273130347, + "loss": 3.1851, + "step": 3210 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004956911357302517, + "loss": 3.2107, + "step": 3211 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004954299995055719, + "loss": 3.1284, + "step": 3212 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004951688645275423, + "loss": 3.0539, + "step": 3213 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004949077308673984, + "loss": 3.0944, + "step": 3214 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004946465985963746, + "loss": 3.2093, + "step": 3215 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004943854677857054, + "loss": 3.1784, + "step": 3216 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004941243385066243, + "loss": 3.186, + "step": 3217 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004938632108303651, + "loss": 3.1289, + "step": 3218 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004936020848281609, + "loss": 3.252, + "step": 3219 + }, + { + "epoch": 0.52, + "learning_rate": 0.000493340960571244, + "loss": 3.3504, + "step": 3220 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004930798381308464, + "loss": 3.3246, + "step": 3221 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004928187175782001, + "loss": 3.3063, + "step": 3222 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004925575989845356, + "loss": 3.2332, + "step": 3223 + }, + { + "epoch": 0.52, + "learning_rate": 0.000492296482421084, + "loss": 3.1433, + "step": 3224 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004920353679590753, + "loss": 3.2586, + "step": 3225 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004917742556697385, + "loss": 3.163, + "step": 3226 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004915131456243026, + "loss": 3.1047, + "step": 3227 + }, + { + "epoch": 0.52, + "learning_rate": 0.000491252037893996, + "loss": 3.2494, + "step": 3228 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004909909325500459, + "loss": 3.1794, + "step": 3229 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004907298296636795, + "loss": 3.0601, + "step": 3230 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004904687293061232, + "loss": 3.3469, + "step": 3231 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004902076315486023, + "loss": 3.2351, + "step": 3232 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004899465364623418, + "loss": 3.3783, + "step": 3233 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004896854441185658, + "loss": 3.1779, + "step": 3234 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004894243545884976, + "loss": 3.1827, + "step": 3235 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004891632679433597, + "loss": 3.1433, + "step": 3236 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004889021842543744, + "loss": 3.2206, + "step": 3237 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004886411035927622, + "loss": 3.2492, + "step": 3238 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004883800260297439, + "loss": 3.3205, + "step": 3239 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004881189516365383, + "loss": 3.1091, + "step": 3240 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048785788048436404, + "loss": 3.2434, + "step": 3241 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048759681264443864, + "loss": 3.2045, + "step": 3242 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004873357481879791, + "loss": 3.1103, + "step": 3243 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048707468718620095, + "loss": 3.2026, + "step": 3244 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004868136297103192, + "loss": 3.1947, + "step": 3245 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048655257583154755, + "loss": 3.2089, + "step": 3246 + }, + { + "epoch": 0.52, + "learning_rate": 0.000486291525621099, + "loss": 3.1493, + "step": 3247 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004860304791501852, + "loss": 3.1183, + "step": 3248 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048576943649001746, + "loss": 3.1506, + "step": 3249 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048550839771180533, + "loss": 3.209, + "step": 3250 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048524736288675766, + "loss": 3.0394, + "step": 3251 + }, + { + "epoch": 0.52, + "learning_rate": 0.000484986332086082, + "loss": 3.1961, + "step": 3252 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004847253053809852, + "loss": 3.0844, + "step": 3253 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048446428284267234, + "loss": 3.2307, + "step": 3254 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004842032645423483, + "loss": 3.0909, + "step": 3255 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004839422505512158, + "loss": 3.2183, + "step": 3256 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004836812409404772, + "loss": 3.2456, + "step": 3257 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004834202357813331, + "loss": 3.1626, + "step": 3258 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048315923514498317, + "loss": 2.9725, + "step": 3259 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004828982391026258, + "loss": 3.2855, + "step": 3260 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004826372477254579, + "loss": 3.2049, + "step": 3261 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004823762610846755, + "loss": 3.1008, + "step": 3262 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048211527925147326, + "loss": 3.1315, + "step": 3263 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004818543022970442, + "loss": 3.228, + "step": 3264 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004815933302925804, + "loss": 3.3829, + "step": 3265 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048133236330927227, + "loss": 3.1658, + "step": 3266 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048107140141830893, + "loss": 3.0786, + "step": 3267 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048081044469087855, + "loss": 3.2617, + "step": 3268 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004805494931981672, + "loss": 3.2626, + "step": 3269 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048028854701135987, + "loss": 3.2426, + "step": 3270 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048002760620164027, + "loss": 3.3305, + "step": 3271 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047976667084019016, + "loss": 3.1159, + "step": 3272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047950574099819013, + "loss": 3.3174, + "step": 3273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047924481674681957, + "loss": 3.2665, + "step": 3274 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004789838981572558, + "loss": 3.153, + "step": 3275 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004787229853006747, + "loss": 3.1022, + "step": 3276 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004784620782482509, + "loss": 3.1786, + "step": 3277 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047820117707115704, + "loss": 2.9901, + "step": 3278 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047794028184056434, + "loss": 3.2805, + "step": 3279 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047767939262764264, + "loss": 3.2803, + "step": 3280 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004774185095035599, + "loss": 3.3162, + "step": 3281 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004771576325394822, + "loss": 3.2557, + "step": 3282 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047689676180657435, + "loss": 3.1711, + "step": 3283 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047663589737599924, + "loss": 3.1379, + "step": 3284 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004763750393189181, + "loss": 3.2339, + "step": 3285 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047611418770649014, + "loss": 3.2799, + "step": 3286 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047585334260987345, + "loss": 3.3689, + "step": 3287 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004755925041002239, + "loss": 3.2719, + "step": 3288 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047533167224869563, + "loss": 3.2448, + "step": 3289 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047507084712644074, + "loss": 3.1812, + "step": 3290 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047481002880461, + "loss": 3.1863, + "step": 3291 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004745492173543517, + "loss": 3.3076, + "step": 3292 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004742884128468129, + "loss": 3.3589, + "step": 3293 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047402761535313853, + "loss": 3.1985, + "step": 3294 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004737668249444714, + "loss": 3.1538, + "step": 3295 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047350604169195236, + "loss": 3.3158, + "step": 3296 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047324526566672084, + "loss": 3.2434, + "step": 3297 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047298449693991345, + "loss": 3.1582, + "step": 3298 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004727237355826657, + "loss": 3.227, + "step": 3299 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047246298166611075, + "loss": 3.1504, + "step": 3300 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047220223526137935, + "loss": 3.112, + "step": 3301 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004719414964396007, + "loss": 3.3051, + "step": 3302 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047168076527190177, + "loss": 3.1122, + "step": 3303 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047142004182940714, + "loss": 3.2138, + "step": 3304 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047115932618323984, + "loss": 3.1657, + "step": 3305 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047089861840452055, + "loss": 3.1503, + "step": 3306 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004706379185643675, + "loss": 3.3249, + "step": 3307 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047037722673389724, + "loss": 3.0006, + "step": 3308 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047011654298422385, + "loss": 3.2079, + "step": 3309 + }, + { + "epoch": 0.53, + "learning_rate": 0.000469855867386459, + "loss": 3.2883, + "step": 3310 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004695952000117125, + "loss": 3.0878, + "step": 3311 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004693345409310922, + "loss": 3.2655, + "step": 3312 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004690738902157028, + "loss": 3.3669, + "step": 3313 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004688132479366475, + "loss": 3.0848, + "step": 3314 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004685526141650267, + "loss": 3.0766, + "step": 3315 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004682919889719389, + "loss": 3.1671, + "step": 3316 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004680313724284798, + "loss": 3.1254, + "step": 3317 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046777076460574336, + "loss": 3.1751, + "step": 3318 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004675101655748204, + "loss": 3.2538, + "step": 3319 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004672495754068001, + "loss": 3.1657, + "step": 3320 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046698899417276846, + "loss": 3.3485, + "step": 3321 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046672842194380983, + "loss": 3.251, + "step": 3322 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004664678587910053, + "loss": 3.2865, + "step": 3323 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004662073047854343, + "loss": 3.2871, + "step": 3324 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046594675999817313, + "loss": 3.3369, + "step": 3325 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046568622450029594, + "loss": 3.1117, + "step": 3326 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004654256983628741, + "loss": 3.2471, + "step": 3327 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004651651816569766, + "loss": 3.2726, + "step": 3328 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004649046744536696, + "loss": 3.2909, + "step": 3329 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046464417682401724, + "loss": 3.2507, + "step": 3330 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046438368883908036, + "loss": 3.2136, + "step": 3331 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046412321056991766, + "loss": 3.2698, + "step": 3332 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004638627420875849, + "loss": 3.2456, + "step": 3333 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046360228346313537, + "loss": 3.1461, + "step": 3334 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004633418347676195, + "loss": 3.2197, + "step": 3335 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004630813960720849, + "loss": 3.2401, + "step": 3336 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046282096744757725, + "loss": 2.9968, + "step": 3337 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004625605489651384, + "loss": 3.1799, + "step": 3338 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046230014069580794, + "loss": 3.1964, + "step": 3339 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004620397427106229, + "loss": 3.3756, + "step": 3340 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046177935508061687, + "loss": 3.2419, + "step": 3341 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004615189778768211, + "loss": 3.3482, + "step": 3342 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004612586111702642, + "loss": 3.2245, + "step": 3343 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046099825503197146, + "loss": 3.2362, + "step": 3344 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046073790953296524, + "loss": 3.0774, + "step": 3345 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046047757474426546, + "loss": 3.0815, + "step": 3346 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046021725073688857, + "loss": 3.2171, + "step": 3347 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045995693758184845, + "loss": 3.2208, + "step": 3348 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045969663535015616, + "loss": 3.1965, + "step": 3349 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045943634411281926, + "loss": 3.1778, + "step": 3350 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004591760639408429, + "loss": 3.3007, + "step": 3351 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045891579490522867, + "loss": 3.0539, + "step": 3352 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004586555370769753, + "loss": 3.2757, + "step": 3353 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045839529052707864, + "loss": 3.2285, + "step": 3354 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004581350553265315, + "loss": 3.2487, + "step": 3355 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004578748315463232, + "loss": 3.2526, + "step": 3356 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004576146192574405, + "loss": 3.1446, + "step": 3357 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004573544185308664, + "loss": 3.1207, + "step": 3358 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004570942294375811, + "loss": 3.1406, + "step": 3359 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045683405204856184, + "loss": 3.0804, + "step": 3360 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004565738864347819, + "loss": 3.2368, + "step": 3361 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045631373266721246, + "loss": 3.1443, + "step": 3362 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045605359081682064, + "loss": 3.2279, + "step": 3363 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004557934609545705, + "loss": 3.1636, + "step": 3364 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045553334315142274, + "loss": 3.061, + "step": 3365 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045527323747833514, + "loss": 3.1988, + "step": 3366 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004550131440062617, + "loss": 3.2118, + "step": 3367 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045475306280615347, + "loss": 3.173, + "step": 3368 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045449299394895816, + "loss": 3.0572, + "step": 3369 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004542329375056197, + "loss": 3.0886, + "step": 3370 + }, + { + "epoch": 0.54, + "learning_rate": 0.000453972893547079, + "loss": 3.2411, + "step": 3371 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045371286214427345, + "loss": 3.3076, + "step": 3372 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004534528433681368, + "loss": 3.3572, + "step": 3373 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045319283728959984, + "loss": 3.2711, + "step": 3374 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004529328439795898, + "loss": 3.2829, + "step": 3375 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004526728635090298, + "loss": 3.2396, + "step": 3376 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004524128959488403, + "loss": 3.318, + "step": 3377 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004521529413699377, + "loss": 3.3001, + "step": 3378 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045189299984323486, + "loss": 3.1894, + "step": 3379 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004516330714396414, + "loss": 3.2905, + "step": 3380 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045137315623006337, + "loss": 3.1209, + "step": 3381 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004511132542854027, + "loss": 3.3547, + "step": 3382 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045085336567655834, + "loss": 3.2633, + "step": 3383 + }, + { + "epoch": 0.55, + "learning_rate": 0.000450593490474425, + "loss": 3.1031, + "step": 3384 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045033362874989435, + "loss": 3.1281, + "step": 3385 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045007378057385376, + "loss": 3.2569, + "step": 3386 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004498139460171876, + "loss": 3.1097, + "step": 3387 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004495541251507758, + "loss": 3.2054, + "step": 3388 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044929431804549514, + "loss": 3.1308, + "step": 3389 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044903452477221826, + "loss": 2.993, + "step": 3390 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044877474540181426, + "loss": 3.2576, + "step": 3391 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044851498000514806, + "loss": 3.1196, + "step": 3392 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004482552286530816, + "loss": 3.2188, + "step": 3393 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044799549141647205, + "loss": 3.2312, + "step": 3394 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044773576836617336, + "loss": 3.0987, + "step": 3395 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004474760595730352, + "loss": 3.0337, + "step": 3396 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044721636510790366, + "loss": 3.3296, + "step": 3397 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004469566850416206, + "loss": 3.1772, + "step": 3398 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044669701944502444, + "loss": 3.1926, + "step": 3399 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004464373683889492, + "loss": 3.2088, + "step": 3400 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004461777319442252, + "loss": 3.148, + "step": 3401 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004459181101816785, + "loss": 3.1503, + "step": 3402 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044565850317213155, + "loss": 3.0292, + "step": 3403 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044539891098640215, + "loss": 3.2398, + "step": 3404 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044513933369530484, + "loss": 3.1354, + "step": 3405 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044487977136964976, + "loss": 3.2474, + "step": 3406 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004446202240802427, + "loss": 3.2825, + "step": 3407 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044436069189788554, + "loss": 3.2114, + "step": 3408 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044410117489337633, + "loss": 3.1577, + "step": 3409 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044384167313750835, + "loss": 3.1737, + "step": 3410 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004435821867010712, + "loss": 3.1093, + "step": 3411 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004433227156548505, + "loss": 3.207, + "step": 3412 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004430632600696272, + "loss": 3.2113, + "step": 3413 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004428038200161779, + "loss": 3.0631, + "step": 3414 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044254439556527564, + "loss": 3.2515, + "step": 3415 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004422849867876886, + "loss": 3.2185, + "step": 3416 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004420255937541808, + "loss": 3.1429, + "step": 3417 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044176621653551246, + "loss": 3.4016, + "step": 3418 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044150685520243885, + "loss": 3.1982, + "step": 3419 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044124750982571113, + "loss": 3.273, + "step": 3420 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004409881804760763, + "loss": 3.1384, + "step": 3421 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004407288672242766, + "loss": 3.1849, + "step": 3422 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044046957014105007, + "loss": 3.0956, + "step": 3423 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044021028929713065, + "loss": 3.4236, + "step": 3424 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043995102476324734, + "loss": 3.1925, + "step": 3425 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043969177661012503, + "loss": 3.1946, + "step": 3426 + }, + { + "epoch": 0.55, + "learning_rate": 0.000439432544908484, + "loss": 3.2926, + "step": 3427 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043917332972904, + "loss": 3.1886, + "step": 3428 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043891413114250424, + "loss": 3.2028, + "step": 3429 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004386549492195838, + "loss": 3.2794, + "step": 3430 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043839578403098077, + "loss": 3.2413, + "step": 3431 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004381366356473929, + "loss": 3.2096, + "step": 3432 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004378775041395132, + "loss": 3.2268, + "step": 3433 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004376183895780301, + "loss": 3.095, + "step": 3434 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043735929203362765, + "loss": 3.1742, + "step": 3435 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043710021157698477, + "loss": 3.2431, + "step": 3436 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004368411482787763, + "loss": 3.1533, + "step": 3437 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043658210220967207, + "loss": 3.477, + "step": 3438 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004363230734403372, + "loss": 3.2855, + "step": 3439 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043606406204143227, + "loss": 3.2613, + "step": 3440 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043580506808361296, + "loss": 3.1322, + "step": 3441 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004355460916375299, + "loss": 3.2311, + "step": 3442 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004352871327738297, + "loss": 3.2778, + "step": 3443 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043502819156315375, + "loss": 3.1577, + "step": 3444 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004347692680761384, + "loss": 3.2708, + "step": 3445 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004345103623834156, + "loss": 3.2704, + "step": 3446 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043425147455561206, + "loss": 3.1493, + "step": 3447 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043399260466334973, + "loss": 3.2697, + "step": 3448 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004337337527772459, + "loss": 3.1715, + "step": 3449 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043347491896791293, + "loss": 3.2397, + "step": 3450 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004332161033059578, + "loss": 3.1042, + "step": 3451 + }, + { + "epoch": 0.56, + "learning_rate": 0.000432957305861983, + "loss": 3.2512, + "step": 3452 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043269852670658576, + "loss": 3.2441, + "step": 3453 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004324397659103586, + "loss": 3.0104, + "step": 3454 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043218102354388875, + "loss": 3.2199, + "step": 3455 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043192229967775874, + "loss": 3.3412, + "step": 3456 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004316635943825456, + "loss": 3.1961, + "step": 3457 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004314049077288218, + "loss": 3.0986, + "step": 3458 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043114623978715425, + "loss": 3.0902, + "step": 3459 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043088759062810515, + "loss": 3.2653, + "step": 3460 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004306289603222312, + "loss": 2.9927, + "step": 3461 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004303703489400845, + "loss": 3.2053, + "step": 3462 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043011175655221134, + "loss": 3.1761, + "step": 3463 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004298531832291535, + "loss": 3.33, + "step": 3464 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004295946290414467, + "loss": 3.1031, + "step": 3465 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004293360940596224, + "loss": 3.2573, + "step": 3466 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042907757835420593, + "loss": 3.1434, + "step": 3467 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042881908199571825, + "loss": 3.2677, + "step": 3468 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004285606050546742, + "loss": 3.1098, + "step": 3469 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004283021476015841, + "loss": 3.1216, + "step": 3470 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004280437097069522, + "loss": 3.1876, + "step": 3471 + }, + { + "epoch": 0.56, + "learning_rate": 0.000427785291441278, + "loss": 3.3451, + "step": 3472 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004275268928750551, + "loss": 3.2029, + "step": 3473 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004272685140787724, + "loss": 3.254, + "step": 3474 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004270101551229131, + "loss": 3.1195, + "step": 3475 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004267518160779549, + "loss": 3.2315, + "step": 3476 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004264934970143697, + "loss": 3.1434, + "step": 3477 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004262351980026249, + "loss": 3.0753, + "step": 3478 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042597691911318134, + "loss": 3.1686, + "step": 3479 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004257186604164955, + "loss": 3.2603, + "step": 3480 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004254604219830175, + "loss": 3.2399, + "step": 3481 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042520220388319224, + "loss": 3.1753, + "step": 3482 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042494400618745893, + "loss": 3.0867, + "step": 3483 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004246858289662515, + "loss": 3.2331, + "step": 3484 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042442767228999804, + "loss": 3.3531, + "step": 3485 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042416953622912085, + "loss": 3.2081, + "step": 3486 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004239114208540374, + "loss": 3.0914, + "step": 3487 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042365332623515864, + "loss": 3.3112, + "step": 3488 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042339525244289034, + "loss": 3.1849, + "step": 3489 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004231371995476324, + "loss": 3.21, + "step": 3490 + }, + { + "epoch": 0.56, + "learning_rate": 0.000422879167619779, + "loss": 3.3596, + "step": 3491 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004226211567297186, + "loss": 3.0654, + "step": 3492 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042236316694783434, + "loss": 3.2633, + "step": 3493 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042210519834450297, + "loss": 3.1809, + "step": 3494 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042184725099009594, + "loss": 3.2025, + "step": 3495 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004215893249549786, + "loss": 3.285, + "step": 3496 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042133142030951044, + "loss": 3.2173, + "step": 3497 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004210735371240454, + "loss": 3.3562, + "step": 3498 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042081567546893166, + "loss": 3.2351, + "step": 3499 + }, + { + "epoch": 0.56, + "learning_rate": 0.000420557835414511, + "loss": 3.103, + "step": 3500 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004203000170311199, + "loss": 3.204, + "step": 3501 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042004222038908846, + "loss": 3.0736, + "step": 3502 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004197844455587409, + "loss": 3.1707, + "step": 3503 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004195266926103959, + "loss": 3.2559, + "step": 3504 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004192689616143655, + "loss": 3.1687, + "step": 3505 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004190112526409565, + "loss": 3.216, + "step": 3506 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004187535657604694, + "loss": 3.3032, + "step": 3507 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041849590104319834, + "loss": 3.155, + "step": 3508 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004182382585594318, + "loss": 3.1065, + "step": 3509 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041798063837945214, + "loss": 3.2242, + "step": 3510 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041772304057353525, + "loss": 3.357, + "step": 3511 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004174654652119516, + "loss": 3.1096, + "step": 3512 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004172079123649652, + "loss": 3.1568, + "step": 3513 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004169503821028336, + "loss": 3.0949, + "step": 3514 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004166928744958088, + "loss": 3.0884, + "step": 3515 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004164353896141361, + "loss": 3.2861, + "step": 3516 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041617792752805463, + "loss": 3.0788, + "step": 3517 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004159204883077978, + "loss": 3.1022, + "step": 3518 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004156630720235925, + "loss": 3.1972, + "step": 3519 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041540567874565905, + "loss": 3.0318, + "step": 3520 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041514830854421205, + "loss": 3.191, + "step": 3521 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041489096148945913, + "loss": 3.1712, + "step": 3522 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004146336376516024, + "loss": 3.1524, + "step": 3523 + }, + { + "epoch": 0.57, + "learning_rate": 0.000414376337100837, + "loss": 3.3731, + "step": 3524 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004141190599073521, + "loss": 3.3135, + "step": 3525 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004138618061413302, + "loss": 3.2335, + "step": 3526 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004136045758729477, + "loss": 3.1564, + "step": 3527 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041334736917237426, + "loss": 3.1694, + "step": 3528 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041309018610977355, + "loss": 3.1674, + "step": 3529 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004128330267553022, + "loss": 3.1642, + "step": 3530 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004125758911791112, + "loss": 3.164, + "step": 3531 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041231877945134413, + "loss": 2.9476, + "step": 3532 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004120616916421387, + "loss": 3.1589, + "step": 3533 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041180462782162584, + "loss": 3.2706, + "step": 3534 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004115475880599301, + "loss": 3.2866, + "step": 3535 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041129057242716906, + "loss": 3.1801, + "step": 3536 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041103358099345446, + "loss": 3.2429, + "step": 3537 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041077661382889067, + "loss": 3.162, + "step": 3538 + }, + { + "epoch": 0.57, + "learning_rate": 0.000410519671003576, + "loss": 3.1663, + "step": 3539 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004102627525876016, + "loss": 3.2447, + "step": 3540 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041000585865105257, + "loss": 3.0776, + "step": 3541 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004097489892640066, + "loss": 3.3406, + "step": 3542 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004094921444965354, + "loss": 3.1378, + "step": 3543 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004092353244187038, + "loss": 3.0408, + "step": 3544 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004089785291005695, + "loss": 3.0257, + "step": 3545 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004087217586121836, + "loss": 3.1976, + "step": 3546 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004084650130235909, + "loss": 3.2189, + "step": 3547 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040820829240482856, + "loss": 3.2109, + "step": 3548 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040795159682592775, + "loss": 3.1772, + "step": 3549 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040769492635691246, + "loss": 3.2018, + "step": 3550 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040743828106779977, + "loss": 3.3782, + "step": 3551 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040718166102859975, + "loss": 3.0642, + "step": 3552 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040692506630931615, + "loss": 3.1237, + "step": 3553 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040666849697994505, + "loss": 3.0363, + "step": 3554 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004064119531104762, + "loss": 3.182, + "step": 3555 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040615543477089236, + "loss": 3.2085, + "step": 3556 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004058989420311689, + "loss": 3.1791, + "step": 3557 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004056424749612747, + "loss": 3.2958, + "step": 3558 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004053860336311714, + "loss": 3.2603, + "step": 3559 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004051296181108134, + "loss": 3.1953, + "step": 3560 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004048732284701483, + "loss": 3.2972, + "step": 3561 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004046168647791171, + "loss": 3.1484, + "step": 3562 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004043605271076527, + "loss": 3.24, + "step": 3563 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004041042155256819, + "loss": 3.1752, + "step": 3564 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040384793010312363, + "loss": 3.2012, + "step": 3565 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040359167090989005, + "loss": 3.3491, + "step": 3566 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040333543801588615, + "loss": 3.0724, + "step": 3567 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040307923149100977, + "loss": 3.06, + "step": 3568 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004028230514051514, + "loss": 3.3532, + "step": 3569 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040256689782819453, + "loss": 3.2151, + "step": 3570 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004023107708300153, + "loss": 3.1697, + "step": 3571 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004020546704804823, + "loss": 3.1717, + "step": 3572 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004017985968494573, + "loss": 3.042, + "step": 3573 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040154255000679484, + "loss": 3.146, + "step": 3574 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040128653002234176, + "loss": 3.4314, + "step": 3575 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004010305369659379, + "loss": 3.2727, + "step": 3576 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040077457090741537, + "loss": 3.1477, + "step": 3577 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040051863191659944, + "loss": 3.1486, + "step": 3578 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040026272006330745, + "loss": 3.2335, + "step": 3579 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040000683541734956, + "loss": 3.2524, + "step": 3580 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039975097804852876, + "loss": 3.2035, + "step": 3581 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003994951480266405, + "loss": 3.2949, + "step": 3582 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003992393454214723, + "loss": 3.1868, + "step": 3583 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039898357030280487, + "loss": 3.0569, + "step": 3584 + }, + { + "epoch": 0.58, + "learning_rate": 0.000398727822740411, + "loss": 3.1112, + "step": 3585 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039847210280405577, + "loss": 3.2703, + "step": 3586 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003982164105634974, + "loss": 3.1283, + "step": 3587 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003979607460884862, + "loss": 3.1639, + "step": 3588 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003977051094487647, + "loss": 3.3186, + "step": 3589 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039744950071406816, + "loss": 3.1536, + "step": 3590 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003971939199541239, + "loss": 3.0353, + "step": 3591 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039693836723865176, + "loss": 3.0424, + "step": 3592 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039668284263736443, + "loss": 3.1944, + "step": 3593 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039642734621996614, + "loss": 3.3091, + "step": 3594 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003961718780561537, + "loss": 3.2013, + "step": 3595 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003959164382156164, + "loss": 3.2532, + "step": 3596 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003956610267680356, + "loss": 3.0643, + "step": 3597 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039540564378308493, + "loss": 3.0848, + "step": 3598 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039515028933043064, + "loss": 3.2112, + "step": 3599 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039489496347973064, + "loss": 3.1576, + "step": 3600 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003946396663006352, + "loss": 3.1201, + "step": 3601 + }, + { + "epoch": 0.58, + "learning_rate": 0.000394384397862787, + "loss": 3.2159, + "step": 3602 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039412915823582056, + "loss": 3.1391, + "step": 3603 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003938739474893629, + "loss": 3.1118, + "step": 3604 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039361876569303267, + "loss": 3.0487, + "step": 3605 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003933636129164413, + "loss": 3.1398, + "step": 3606 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003931084892291917, + "loss": 3.1341, + "step": 3607 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003928533947008791, + "loss": 3.162, + "step": 3608 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003925983294010907, + "loss": 3.2357, + "step": 3609 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039234329339940587, + "loss": 3.1344, + "step": 3610 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039208828676539557, + "loss": 3.2706, + "step": 3611 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003918333095686235, + "loss": 3.0659, + "step": 3612 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039157836187864474, + "loss": 3.3347, + "step": 3613 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039132344376500647, + "loss": 3.2705, + "step": 3614 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039106855529724764, + "loss": 3.1605, + "step": 3615 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039081369654489956, + "loss": 3.2678, + "step": 3616 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003905588675774848, + "loss": 3.1127, + "step": 3617 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003903040684645184, + "loss": 3.264, + "step": 3618 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039004929927550716, + "loss": 2.9704, + "step": 3619 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003897945600799493, + "loss": 3.1316, + "step": 3620 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003895398509473352, + "loss": 3.3078, + "step": 3621 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038928517194714707, + "loss": 3.4036, + "step": 3622 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038903052314885855, + "loss": 3.279, + "step": 3623 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003887759046219355, + "loss": 3.5028, + "step": 3624 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003885213164358354, + "loss": 3.0752, + "step": 3625 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003882667586600071, + "loss": 3.2094, + "step": 3626 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038801223136389167, + "loss": 3.2529, + "step": 3627 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038775773461692157, + "loss": 3.2987, + "step": 3628 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038750326848852073, + "loss": 3.1633, + "step": 3629 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038724883304810503, + "loss": 3.2604, + "step": 3630 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038699442836508227, + "loss": 3.1565, + "step": 3631 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038674005450885115, + "loss": 3.2225, + "step": 3632 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003864857115488025, + "loss": 3.3834, + "step": 3633 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038623139955431857, + "loss": 3.174, + "step": 3634 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038597711859477286, + "loss": 3.2362, + "step": 3635 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003857228687395308, + "loss": 3.2945, + "step": 3636 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003854686500579494, + "loss": 3.1396, + "step": 3637 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003852144626193768, + "loss": 3.1494, + "step": 3638 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038496030649315296, + "loss": 3.2615, + "step": 3639 + }, + { + "epoch": 0.59, + "learning_rate": 0.000384706181748609, + "loss": 3.0965, + "step": 3640 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003844520884550675, + "loss": 3.1378, + "step": 3641 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003841980266818426, + "loss": 3.1798, + "step": 3642 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003839439964982402, + "loss": 3.1281, + "step": 3643 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038368999797355676, + "loss": 3.1302, + "step": 3644 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003834360311770808, + "loss": 3.0452, + "step": 3645 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003831820961780917, + "loss": 3.1079, + "step": 3646 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003829281930458607, + "loss": 3.3207, + "step": 3647 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003826743218496496, + "loss": 3.2192, + "step": 3648 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038242048265871233, + "loss": 3.1293, + "step": 3649 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003821666755422935, + "loss": 3.024, + "step": 3650 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038191290056962927, + "loss": 3.1154, + "step": 3651 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003816591578099468, + "loss": 3.1187, + "step": 3652 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003814054473324647, + "loss": 3.1588, + "step": 3653 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038115176920639263, + "loss": 3.1915, + "step": 3654 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003808981235009311, + "loss": 3.1378, + "step": 3655 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038064451028527267, + "loss": 3.0514, + "step": 3656 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003803909296286002, + "loss": 3.156, + "step": 3657 + }, + { + "epoch": 0.59, + "learning_rate": 0.000380137381600088, + "loss": 3.3135, + "step": 3658 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003798838662689016, + "loss": 3.1641, + "step": 3659 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037963038370419715, + "loss": 3.106, + "step": 3660 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037937693397512224, + "loss": 3.0765, + "step": 3661 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003791235171508157, + "loss": 3.2622, + "step": 3662 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003788701333004069, + "loss": 3.1377, + "step": 3663 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003786167824930164, + "loss": 3.2103, + "step": 3664 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003783634647977559, + "loss": 3.2589, + "step": 3665 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037811018028372775, + "loss": 3.0918, + "step": 3666 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003778569290200254, + "loss": 3.219, + "step": 3667 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003776037110757336, + "loss": 3.2095, + "step": 3668 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003773505265199275, + "loss": 3.4172, + "step": 3669 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003770973754216732, + "loss": 3.2771, + "step": 3670 + }, + { + "epoch": 0.59, + "learning_rate": 0.000376844257850028, + "loss": 3.1861, + "step": 3671 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003765911738740397, + "loss": 3.2049, + "step": 3672 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037633812356274694, + "loss": 3.225, + "step": 3673 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037608510698517974, + "loss": 3.2639, + "step": 3674 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037583212421035824, + "loss": 3.1461, + "step": 3675 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003755791753072935, + "loss": 3.1747, + "step": 3676 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037532626034498775, + "loss": 3.2179, + "step": 3677 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037507337939243334, + "loss": 3.2245, + "step": 3678 + }, + { + "epoch": 0.59, + "learning_rate": 0.000374820532518614, + "loss": 3.1879, + "step": 3679 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037456771979250334, + "loss": 3.1053, + "step": 3680 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037431494128306673, + "loss": 3.2661, + "step": 3681 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003740621970592594, + "loss": 3.15, + "step": 3682 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037380948719002743, + "loss": 3.1301, + "step": 3683 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037355681174430745, + "loss": 3.2726, + "step": 3684 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037330417079102697, + "loss": 3.2972, + "step": 3685 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037305156439910363, + "loss": 3.2773, + "step": 3686 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003727989926374463, + "loss": 3.0526, + "step": 3687 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003725464555749539, + "loss": 3.1856, + "step": 3688 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037229395328051604, + "loss": 3.108, + "step": 3689 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003720414858230126, + "loss": 3.247, + "step": 3690 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003717890532713145, + "loss": 3.2252, + "step": 3691 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003715366556942825, + "loss": 3.23, + "step": 3692 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037128429316076844, + "loss": 3.2128, + "step": 3693 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003710319657396143, + "loss": 3.1424, + "step": 3694 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003707796734996522, + "loss": 3.1431, + "step": 3695 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037052741650970525, + "loss": 3.1507, + "step": 3696 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003702751948385864, + "loss": 3.0993, + "step": 3697 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003700230085550992, + "loss": 3.0506, + "step": 3698 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003697708577280377, + "loss": 3.1036, + "step": 3699 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003695187424261862, + "loss": 3.2103, + "step": 3700 + }, + { + "epoch": 0.6, + "learning_rate": 0.000369266662718319, + "loss": 3.1875, + "step": 3701 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003690146186732012, + "loss": 3.0278, + "step": 3702 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036876261035958765, + "loss": 3.2181, + "step": 3703 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036851063784622376, + "loss": 3.2134, + "step": 3704 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036825870120184496, + "loss": 3.0586, + "step": 3705 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036800680049517756, + "loss": 3.3617, + "step": 3706 + }, + { + "epoch": 0.6, + "learning_rate": 0.000367754935794937, + "loss": 3.1478, + "step": 3707 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036750310716982996, + "loss": 3.1093, + "step": 3708 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003672513146885523, + "loss": 3.1908, + "step": 3709 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003669995584197908, + "loss": 3.1753, + "step": 3710 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036674783843222177, + "loss": 3.3199, + "step": 3711 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003664961547945123, + "loss": 3.1907, + "step": 3712 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003662445075753189, + "loss": 3.0726, + "step": 3713 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003659928968432886, + "loss": 3.132, + "step": 3714 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036574132266705805, + "loss": 3.3089, + "step": 3715 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003654897851152544, + "loss": 3.2814, + "step": 3716 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003652382842564943, + "loss": 3.0118, + "step": 3717 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003649868201593851, + "loss": 3.133, + "step": 3718 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003647353928925234, + "loss": 3.2615, + "step": 3719 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036448400252449617, + "loss": 3.1365, + "step": 3720 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036423264912388, + "loss": 3.0687, + "step": 3721 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003639813327592419, + "loss": 3.2587, + "step": 3722 + }, + { + "epoch": 0.6, + "learning_rate": 0.000363730053499138, + "loss": 3.2239, + "step": 3723 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003634788114121154, + "loss": 3.183, + "step": 3724 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003632276065667098, + "loss": 3.0569, + "step": 3725 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036297643903144796, + "loss": 3.3382, + "step": 3726 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036272530887484535, + "loss": 3.4445, + "step": 3727 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036247421616540816, + "loss": 3.1898, + "step": 3728 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003622231609716317, + "loss": 3.3619, + "step": 3729 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036197214336200137, + "loss": 3.177, + "step": 3730 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003617211634049925, + "loss": 3.2446, + "step": 3731 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003614702211690698, + "loss": 3.1348, + "step": 3732 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003612193167226876, + "loss": 3.0741, + "step": 3733 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003609684501342905, + "loss": 3.167, + "step": 3734 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036071762147231196, + "loss": 3.0626, + "step": 3735 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036046683080517573, + "loss": 3.1588, + "step": 3736 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003602160782012952, + "loss": 3.1651, + "step": 3737 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003599653637290731, + "loss": 3.1758, + "step": 3738 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003597146874569015, + "loss": 3.2, + "step": 3739 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003594640494531628, + "loss": 3.1079, + "step": 3740 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003592134497862283, + "loss": 3.1873, + "step": 3741 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035896288852445903, + "loss": 3.1328, + "step": 3742 + }, + { + "epoch": 0.6, + "learning_rate": 0.000358712365736206, + "loss": 3.2796, + "step": 3743 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035846188148980896, + "loss": 3.2468, + "step": 3744 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003582114358535976, + "loss": 3.425, + "step": 3745 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003579610288958911, + "loss": 3.2191, + "step": 3746 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035771066068499767, + "loss": 3.1012, + "step": 3747 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003574603312892155, + "loss": 3.0041, + "step": 3748 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035721004077683206, + "loss": 3.1309, + "step": 3749 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035695978921612383, + "loss": 3.1496, + "step": 3750 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003567095766753572, + "loss": 3.2345, + "step": 3751 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003564594032227875, + "loss": 3.1334, + "step": 3752 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003562092689266595, + "loss": 3.1289, + "step": 3753 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035595917385520756, + "loss": 3.1095, + "step": 3754 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003557091180766547, + "loss": 3.0637, + "step": 3755 + }, + { + "epoch": 0.61, + "learning_rate": 0.000355459101659214, + "loss": 3.27, + "step": 3756 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035520912467108747, + "loss": 3.078, + "step": 3757 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035495918718046624, + "loss": 3.2856, + "step": 3758 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003547092892555306, + "loss": 3.1458, + "step": 3759 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035445943096445044, + "loss": 3.2951, + "step": 3760 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035420961237538437, + "loss": 3.348, + "step": 3761 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035395983355648067, + "loss": 3.2594, + "step": 3762 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035371009457587645, + "loss": 3.293, + "step": 3763 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035346039550169785, + "loss": 3.2117, + "step": 3764 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003532107364020605, + "loss": 3.1945, + "step": 3765 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035296111734506877, + "loss": 3.1235, + "step": 3766 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035271153839881605, + "loss": 3.2138, + "step": 3767 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035246199963138536, + "loss": 3.137, + "step": 3768 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035221250111084837, + "loss": 3.1597, + "step": 3769 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035196304290526545, + "loss": 3.1508, + "step": 3770 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035171362508268665, + "loss": 3.1114, + "step": 3771 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003514642477111505, + "loss": 3.0672, + "step": 3772 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003512149108586845, + "loss": 3.3381, + "step": 3773 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035096561459330557, + "loss": 3.1665, + "step": 3774 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035071635898301914, + "loss": 3.0854, + "step": 3775 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003504671440958195, + "loss": 3.2345, + "step": 3776 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003502179699996903, + "loss": 3.1385, + "step": 3777 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034996883676260347, + "loss": 3.0541, + "step": 3778 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003497197444525201, + "loss": 3.2339, + "step": 3779 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034947069313738993, + "loss": 3.3471, + "step": 3780 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003492216828851521, + "loss": 3.0803, + "step": 3781 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034897271376373385, + "loss": 3.2488, + "step": 3782 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003487237858410516, + "loss": 3.2869, + "step": 3783 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003484748991850102, + "loss": 3.1695, + "step": 3784 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003482260538635036, + "loss": 3.2834, + "step": 3785 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034797724994441414, + "loss": 2.9703, + "step": 3786 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003477284874956134, + "loss": 3.1104, + "step": 3787 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034747976658496097, + "loss": 3.1409, + "step": 3788 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034723108728030563, + "loss": 3.2163, + "step": 3789 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034698244964948433, + "loss": 3.2635, + "step": 3790 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034673385376032335, + "loss": 3.0444, + "step": 3791 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003464852996806367, + "loss": 3.0745, + "step": 3792 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034623678747822785, + "loss": 3.1054, + "step": 3793 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034598831722088826, + "loss": 3.29, + "step": 3794 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003457398889763982, + "loss": 3.0743, + "step": 3795 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034549150281252633, + "loss": 3.1037, + "step": 3796 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034524315879703006, + "loss": 3.1515, + "step": 3797 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034499485699765486, + "loss": 3.3224, + "step": 3798 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003447465974821352, + "loss": 3.1269, + "step": 3799 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034449838031819403, + "loss": 3.0513, + "step": 3800 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003442502055735421, + "loss": 3.1829, + "step": 3801 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034400207331587924, + "loss": 3.1715, + "step": 3802 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003437539836128935, + "loss": 3.3458, + "step": 3803 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034350593653226095, + "loss": 3.2647, + "step": 3804 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003432579321416464, + "loss": 3.1644, + "step": 3805 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003430099705087034, + "loss": 3.1215, + "step": 3806 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003427620517010732, + "loss": 3.2238, + "step": 3807 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003425141757863854, + "loss": 3.2699, + "step": 3808 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003422663428322582, + "loss": 3.1745, + "step": 3809 + }, + { + "epoch": 0.61, + "learning_rate": 0.000342018552906298, + "loss": 3.1356, + "step": 3810 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003417708060760992, + "loss": 3.1268, + "step": 3811 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034152310240924503, + "loss": 3.1308, + "step": 3812 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034127544197330636, + "loss": 3.0954, + "step": 3813 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034102782483584235, + "loss": 3.2691, + "step": 3814 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003407802510644008, + "loss": 3.141, + "step": 3815 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003405327207265171, + "loss": 3.2359, + "step": 3816 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003402852338897151, + "loss": 3.1622, + "step": 3817 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003400377906215069, + "loss": 3.111, + "step": 3818 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003397903909893924, + "loss": 3.1455, + "step": 3819 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033954303506085985, + "loss": 3.1247, + "step": 3820 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003392957229033855, + "loss": 3.222, + "step": 3821 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003390484545844334, + "loss": 3.2638, + "step": 3822 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003388012301714559, + "loss": 3.1266, + "step": 3823 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033855404973189376, + "loss": 3.0327, + "step": 3824 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033830691333317496, + "loss": 3.3177, + "step": 3825 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033805982104271606, + "loss": 3.0392, + "step": 3826 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003378127729279212, + "loss": 3.1377, + "step": 3827 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003375657690561826, + "loss": 3.1744, + "step": 3828 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003373188094948807, + "loss": 3.2019, + "step": 3829 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033707189431138323, + "loss": 3.0339, + "step": 3830 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003368250235730466, + "loss": 3.1042, + "step": 3831 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003365781973472144, + "loss": 3.0907, + "step": 3832 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003363314157012185, + "loss": 3.2083, + "step": 3833 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003360846787023785, + "loss": 3.1847, + "step": 3834 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033583798641800166, + "loss": 3.2091, + "step": 3835 + }, + { + "epoch": 0.62, + "learning_rate": 0.000335591338915383, + "loss": 3.3147, + "step": 3836 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033534473626180587, + "loss": 3.2378, + "step": 3837 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033509817852454094, + "loss": 3.0241, + "step": 3838 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003348516657708466, + "loss": 3.2588, + "step": 3839 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033460519806796906, + "loss": 3.0929, + "step": 3840 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003343587754831424, + "loss": 3.2418, + "step": 3841 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033411239808358787, + "loss": 3.302, + "step": 3842 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033386606593651503, + "loss": 3.0182, + "step": 3843 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033361977910912103, + "loss": 3.1076, + "step": 3844 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003333735376685901, + "loss": 3.1627, + "step": 3845 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003331273416820947, + "loss": 3.0565, + "step": 3846 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003328811912167945, + "loss": 3.2785, + "step": 3847 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033263508633983677, + "loss": 3.1276, + "step": 3848 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003323890271183566, + "loss": 3.1734, + "step": 3849 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003321430136194766, + "loss": 3.3011, + "step": 3850 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033189704591030675, + "loss": 3.2495, + "step": 3851 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003316511240579445, + "loss": 3.139, + "step": 3852 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003314052481294748, + "loss": 3.3046, + "step": 3853 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033115941819197045, + "loss": 3.2697, + "step": 3854 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033091363431249094, + "loss": 3.0877, + "step": 3855 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033066789655808416, + "loss": 3.1205, + "step": 3856 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033042220499578454, + "loss": 3.1523, + "step": 3857 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003301765596926145, + "loss": 3.0972, + "step": 3858 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003299309607155835, + "loss": 2.9794, + "step": 3859 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003296854081316887, + "loss": 3.2037, + "step": 3860 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032943990200791395, + "loss": 3.1062, + "step": 3861 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032919444241123134, + "loss": 3.3388, + "step": 3862 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003289490294085996, + "loss": 3.3456, + "step": 3863 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032870366306696495, + "loss": 3.1253, + "step": 3864 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032845834345326085, + "loss": 3.0594, + "step": 3865 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003282130706344082, + "loss": 3.205, + "step": 3866 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032796784467731466, + "loss": 3.2603, + "step": 3867 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032772266564887566, + "loss": 3.3066, + "step": 3868 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032747753361597363, + "loss": 3.086, + "step": 3869 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003272324486454782, + "loss": 3.249, + "step": 3870 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032698741080424576, + "loss": 3.0652, + "step": 3871 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003267424201591205, + "loss": 3.218, + "step": 3872 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032649747677693307, + "loss": 3.12, + "step": 3873 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032625258072450203, + "loss": 3.1962, + "step": 3874 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032600773206863245, + "loss": 3.1178, + "step": 3875 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003257629308761164, + "loss": 3.239, + "step": 3876 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032551817721373333, + "loss": 3.1471, + "step": 3877 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003252734711482497, + "loss": 3.1145, + "step": 3878 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003250288127464186, + "loss": 3.2499, + "step": 3879 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003247842020749805, + "loss": 3.063, + "step": 3880 + }, + { + "epoch": 0.63, + "learning_rate": 0.000324539639200663, + "loss": 3.1301, + "step": 3881 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032429512419018027, + "loss": 3.2871, + "step": 3882 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003240506571102334, + "loss": 3.0852, + "step": 3883 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032380623802751073, + "loss": 3.2337, + "step": 3884 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032356186700868727, + "loss": 3.1967, + "step": 3885 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003233175441204249, + "loss": 3.2311, + "step": 3886 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003230732694293728, + "loss": 3.2081, + "step": 3887 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003228290430021664, + "loss": 3.1216, + "step": 3888 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032258486490542836, + "loss": 3.1377, + "step": 3889 + }, + { + "epoch": 0.63, + "learning_rate": 0.000322340735205768, + "loss": 3.1451, + "step": 3890 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003220966539697813, + "loss": 3.0131, + "step": 3891 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032185262126405113, + "loss": 3.2489, + "step": 3892 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032160863715514763, + "loss": 3.2848, + "step": 3893 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032136470170962686, + "loss": 3.3124, + "step": 3894 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003211208149940321, + "loss": 3.3503, + "step": 3895 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032087697707489327, + "loss": 3.198, + "step": 3896 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003206331880187267, + "loss": 3.1232, + "step": 3897 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003203894478920356, + "loss": 3.1928, + "step": 3898 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003201457567613102, + "loss": 3.2272, + "step": 3899 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003199021146930268, + "loss": 3.2764, + "step": 3900 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003196585217536485, + "loss": 3.1136, + "step": 3901 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031941497800962496, + "loss": 3.1489, + "step": 3902 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003191714835273927, + "loss": 3.0662, + "step": 3903 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031892803837337436, + "loss": 3.1778, + "step": 3904 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031868464261397924, + "loss": 3.0942, + "step": 3905 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003184412963156036, + "loss": 3.1304, + "step": 3906 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003181979995446298, + "loss": 3.1426, + "step": 3907 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031795475236742667, + "loss": 3.26, + "step": 3908 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031771155485034973, + "loss": 3.2953, + "step": 3909 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003174684070597408, + "loss": 3.2263, + "step": 3910 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003172253090619279, + "loss": 3.1705, + "step": 3911 + }, + { + "epoch": 0.63, + "learning_rate": 0.000316982260923226, + "loss": 3.072, + "step": 3912 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031673926270993634, + "loss": 3.2148, + "step": 3913 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031649631448834616, + "loss": 3.1464, + "step": 3914 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003162534163247295, + "loss": 3.0728, + "step": 3915 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031601056828534633, + "loss": 3.1059, + "step": 3916 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031576777043644316, + "loss": 3.319, + "step": 3917 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031552502284425306, + "loss": 3.195, + "step": 3918 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031528232557499514, + "loss": 3.0563, + "step": 3919 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031503967869487453, + "loss": 3.2843, + "step": 3920 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003147970822700832, + "loss": 3.1263, + "step": 3921 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031455453636679867, + "loss": 3.2415, + "step": 3922 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031431204105118515, + "loss": 3.0898, + "step": 3923 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003140695963893933, + "loss": 3.1328, + "step": 3924 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003138272024475593, + "loss": 3.2403, + "step": 3925 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003135848592918057, + "loss": 3.2497, + "step": 3926 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003133425669882416, + "loss": 3.2576, + "step": 3927 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031310032560296155, + "loss": 3.1809, + "step": 3928 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003128581352020469, + "loss": 3.1822, + "step": 3929 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031261599585156443, + "loss": 3.1691, + "step": 3930 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003123739076175678, + "loss": 3.2921, + "step": 3931 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003121318705660959, + "loss": 3.2874, + "step": 3932 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003118898847631742, + "loss": 3.2254, + "step": 3933 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031164795027481383, + "loss": 3.1937, + "step": 3934 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003114060671670124, + "loss": 3.0856, + "step": 3935 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003111642355057528, + "loss": 3.2967, + "step": 3936 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031092245535700464, + "loss": 3.1986, + "step": 3937 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003106807267867231, + "loss": 3.2297, + "step": 3938 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031043904986084926, + "loss": 3.1417, + "step": 3939 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031019742464531, + "loss": 3.0704, + "step": 3940 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030995585120601854, + "loss": 3.1322, + "step": 3941 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030971432960887334, + "loss": 3.1744, + "step": 3942 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003094728599197595, + "loss": 3.1892, + "step": 3943 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003092314422045474, + "loss": 2.9808, + "step": 3944 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030899007652909326, + "loss": 3.1168, + "step": 3945 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003087487629592393, + "loss": 3.0731, + "step": 3946 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003085075015608135, + "loss": 3.1754, + "step": 3947 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030826629239962943, + "loss": 3.2921, + "step": 3948 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030802513554148664, + "loss": 3.2581, + "step": 3949 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030778403105217046, + "loss": 3.1403, + "step": 3950 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003075429789974515, + "loss": 3.1663, + "step": 3951 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003073019794430866, + "loss": 3.1007, + "step": 3952 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003070610324548179, + "loss": 2.9963, + "step": 3953 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030682013809837325, + "loss": 3.2805, + "step": 3954 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003065792964394662, + "loss": 3.2961, + "step": 3955 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030633850754379635, + "loss": 3.2686, + "step": 3956 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030609777147704806, + "loss": 3.2565, + "step": 3957 + }, + { + "epoch": 0.64, + "learning_rate": 0.000305857088304892, + "loss": 3.2475, + "step": 3958 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003056164580929841, + "loss": 3.17, + "step": 3959 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003053758809069657, + "loss": 3.2915, + "step": 3960 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003051353568124638, + "loss": 3.0746, + "step": 3961 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003048948858750914, + "loss": 3.0767, + "step": 3962 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003046544681604462, + "loss": 3.2315, + "step": 3963 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030441410373411193, + "loss": 3.3243, + "step": 3964 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003041737926616576, + "loss": 3.1168, + "step": 3965 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030393353500863754, + "loss": 3.1595, + "step": 3966 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003036933308405915, + "loss": 3.2305, + "step": 3967 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003034531802230452, + "loss": 3.0983, + "step": 3968 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003032130832215091, + "loss": 3.1213, + "step": 3969 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003029730399014794, + "loss": 3.1739, + "step": 3970 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030273305032843724, + "loss": 3.0616, + "step": 3971 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030249311456784965, + "loss": 3.1848, + "step": 3972 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003022532326851685, + "loss": 3.2177, + "step": 3973 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030201340474583137, + "loss": 3.2138, + "step": 3974 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003017736308152608, + "loss": 3.2187, + "step": 3975 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003015339109588648, + "loss": 3.1413, + "step": 3976 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003012942452420364, + "loss": 3.0542, + "step": 3977 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030105463373015427, + "loss": 3.1738, + "step": 3978 + }, + { + "epoch": 0.64, + "learning_rate": 0.000300815076488582, + "loss": 3.3473, + "step": 3979 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030057557358266794, + "loss": 3.1101, + "step": 3980 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030033612507774667, + "loss": 3.1499, + "step": 3981 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003000967310391373, + "loss": 3.0242, + "step": 3982 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029985739153214373, + "loss": 3.1692, + "step": 3983 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002996181066220558, + "loss": 3.1013, + "step": 3984 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002993788763741479, + "loss": 3.1263, + "step": 3985 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002991397008536794, + "loss": 3.1486, + "step": 3986 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002989005801258954, + "loss": 3.3533, + "step": 3987 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002986615142560255, + "loss": 3.0696, + "step": 3988 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002984225033092844, + "loss": 3.2083, + "step": 3989 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002981835473508721, + "loss": 3.2213, + "step": 3990 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029794464644597305, + "loss": 3.1267, + "step": 3991 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002977058006597572, + "loss": 3.2145, + "step": 3992 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002974670100573795, + "loss": 3.1627, + "step": 3993 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029722827470397953, + "loss": 3.0899, + "step": 3994 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002969895946646818, + "loss": 3.1466, + "step": 3995 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029675097000459594, + "loss": 3.2256, + "step": 3996 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002965124007888163, + "loss": 3.1676, + "step": 3997 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029627388708242195, + "loss": 3.2112, + "step": 3998 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002960354289504776, + "loss": 3.1592, + "step": 3999 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002957970264580321, + "loss": 3.2051, + "step": 4000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029555867967011887, + "loss": 2.9887, + "step": 4001 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029532038865175695, + "loss": 3.1266, + "step": 4002 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002950821534679495, + "loss": 3.1663, + "step": 4003 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029484397418368493, + "loss": 3.1082, + "step": 4004 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029460585086393576, + "loss": 3.0419, + "step": 4005 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029436778357366014, + "loss": 3.1903, + "step": 4006 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029412977237780024, + "loss": 3.1059, + "step": 4007 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002938918173412832, + "loss": 3.2009, + "step": 4008 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002936539185290206, + "loss": 3.132, + "step": 4009 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002934160760059091, + "loss": 3.2861, + "step": 4010 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002931782898368294, + "loss": 3.2226, + "step": 4011 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002929405600866476, + "loss": 3.2351, + "step": 4012 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002927028868202139, + "loss": 3.0392, + "step": 4013 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002924652701023631, + "loss": 3.0767, + "step": 4014 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029222770999791473, + "loss": 3.0831, + "step": 4015 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002919902065716728, + "loss": 3.2114, + "step": 4016 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002917527598884256, + "loss": 3.3294, + "step": 4017 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002915153700129468, + "loss": 3.2263, + "step": 4018 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029127803700999355, + "loss": 3.2039, + "step": 4019 + }, + { + "epoch": 0.65, + "learning_rate": 0.000291040760944308, + "loss": 3.2107, + "step": 4020 + }, + { + "epoch": 0.65, + "learning_rate": 0.000290803541880617, + "loss": 3.2709, + "step": 4021 + }, + { + "epoch": 0.65, + "learning_rate": 0.000290566379883631, + "loss": 3.13, + "step": 4022 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029032927501804553, + "loss": 3.2467, + "step": 4023 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002900922273485409, + "loss": 3.2125, + "step": 4024 + }, + { + "epoch": 0.65, + "learning_rate": 0.000289855236939781, + "loss": 3.0606, + "step": 4025 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002896183038564144, + "loss": 3.146, + "step": 4026 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002893814281630744, + "loss": 3.335, + "step": 4027 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028914460992437784, + "loss": 3.2915, + "step": 4028 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028890784920492673, + "loss": 3.148, + "step": 4029 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002886711460693069, + "loss": 3.1647, + "step": 4030 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028843450058208865, + "loss": 3.1223, + "step": 4031 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002881979128078264, + "loss": 3.1942, + "step": 4032 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002879613828110591, + "loss": 3.2868, + "step": 4033 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002877249106563099, + "loss": 3.1564, + "step": 4034 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002874884964080856, + "loss": 3.3212, + "step": 4035 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028725214013087787, + "loss": 3.1215, + "step": 4036 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028701584188916234, + "loss": 3.1798, + "step": 4037 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002867796017473989, + "loss": 3.2662, + "step": 4038 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002865434197700314, + "loss": 3.0221, + "step": 4039 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028630729602148816, + "loss": 3.2844, + "step": 4040 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028607123056618094, + "loss": 3.268, + "step": 4041 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002858352234685063, + "loss": 3.0573, + "step": 4042 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002855992747928446, + "loss": 3.2745, + "step": 4043 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002853633846035603, + "loss": 3.1147, + "step": 4044 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002851275529650018, + "loss": 3.1835, + "step": 4045 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028489177994150196, + "loss": 3.1462, + "step": 4046 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028465606559737675, + "loss": 3.3475, + "step": 4047 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028442040999692705, + "loss": 3.252, + "step": 4048 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002841848132044372, + "loss": 3.1954, + "step": 4049 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002839492752841758, + "loss": 3.2729, + "step": 4050 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002837137963003952, + "loss": 3.0788, + "step": 4051 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002834783763173318, + "loss": 3.2465, + "step": 4052 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002832430153992055, + "loss": 3.1525, + "step": 4053 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002830077136102207, + "loss": 3.2683, + "step": 4054 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002827724710145653, + "loss": 3.122, + "step": 4055 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028253728767641104, + "loss": 3.1916, + "step": 4056 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002823021636599137, + "loss": 3.025, + "step": 4057 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028206709902921294, + "loss": 3.2628, + "step": 4058 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028183209384843167, + "loss": 3.2141, + "step": 4059 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028159714818167713, + "loss": 3.1428, + "step": 4060 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028136226209304015, + "loss": 3.258, + "step": 4061 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028112743564659534, + "loss": 2.9183, + "step": 4062 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002808926689064009, + "loss": 3.0787, + "step": 4063 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028065796193649917, + "loss": 3.0015, + "step": 4064 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002804233148009155, + "loss": 3.1877, + "step": 4065 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002801887275636594, + "loss": 3.2151, + "step": 4066 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002799542002887239, + "loss": 3.2113, + "step": 4067 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002797197330400858, + "loss": 3.0123, + "step": 4068 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002794853258817053, + "loss": 3.1978, + "step": 4069 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027925097887752666, + "loss": 3.2574, + "step": 4070 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002790166920914769, + "loss": 3.0455, + "step": 4071 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002787824655874674, + "loss": 3.13, + "step": 4072 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027854829942939273, + "loss": 3.174, + "step": 4073 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002783141936811312, + "loss": 3.2969, + "step": 4074 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027808014840654437, + "loss": 3.2942, + "step": 4075 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002778461636694778, + "loss": 3.0967, + "step": 4076 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002776122395337597, + "loss": 3.1659, + "step": 4077 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027737837606320244, + "loss": 3.2761, + "step": 4078 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002771445733216017, + "loss": 3.2108, + "step": 4079 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027691083137273645, + "loss": 3.1669, + "step": 4080 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002766771502803692, + "loss": 3.2361, + "step": 4081 + }, + { + "epoch": 0.66, + "learning_rate": 0.000276443530108246, + "loss": 3.1721, + "step": 4082 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002762099709200958, + "loss": 3.2816, + "step": 4083 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002759764727796313, + "loss": 3.1641, + "step": 4084 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027574303575054847, + "loss": 3.1325, + "step": 4085 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027550965989652664, + "loss": 3.0987, + "step": 4086 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002752763452812285, + "loss": 3.2683, + "step": 4087 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027504309196829966, + "loss": 3.2042, + "step": 4088 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027480990002136987, + "loss": 3.1936, + "step": 4089 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002745767695040509, + "loss": 3.2831, + "step": 4090 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002743437004799387, + "loss": 3.1679, + "step": 4091 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027411069301261213, + "loss": 3.1541, + "step": 4092 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027387774716563346, + "loss": 3.1048, + "step": 4093 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027364486300254787, + "loss": 3.2002, + "step": 4094 + }, + { + "epoch": 0.66, + "learning_rate": 0.000273412040586884, + "loss": 3.1037, + "step": 4095 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002731792799821532, + "loss": 3.1418, + "step": 4096 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002729465812518503, + "loss": 3.3329, + "step": 4097 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027271394445945346, + "loss": 3.0457, + "step": 4098 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002724813696684231, + "loss": 3.2609, + "step": 4099 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002722488569422039, + "loss": 3.0507, + "step": 4100 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002720164063442229, + "loss": 3.0593, + "step": 4101 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002717840179378901, + "loss": 3.1612, + "step": 4102 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027155169178659874, + "loss": 3.3641, + "step": 4103 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027131942795372536, + "loss": 3.2591, + "step": 4104 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002710872265026286, + "loss": 3.1082, + "step": 4105 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027085508749665144, + "loss": 3.2559, + "step": 4106 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002706230109991188, + "loss": 3.0895, + "step": 4107 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002703909970733387, + "loss": 3.3199, + "step": 4108 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002701590457826023, + "loss": 3.0681, + "step": 4109 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002699271571901837, + "loss": 3.1658, + "step": 4110 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026969533135933946, + "loss": 3.0492, + "step": 4111 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002694635683533096, + "loss": 3.1969, + "step": 4112 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026923186823531707, + "loss": 3.1702, + "step": 4113 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002690002310685669, + "loss": 3.1702, + "step": 4114 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002687686569162474, + "loss": 2.9403, + "step": 4115 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002685371458415298, + "loss": 3.1884, + "step": 4116 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026830569790756804, + "loss": 3.1893, + "step": 4117 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002680743131774987, + "loss": 3.1849, + "step": 4118 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002678429917144417, + "loss": 3.1613, + "step": 4119 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002676117335814985, + "loss": 3.1853, + "step": 4120 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026738053884175437, + "loss": 3.2112, + "step": 4121 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026714940755827695, + "loss": 3.1344, + "step": 4122 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002669183397941166, + "loss": 3.2412, + "step": 4123 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002666873356123059, + "loss": 3.2354, + "step": 4124 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002664563950758611, + "loss": 3.1895, + "step": 4125 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026622551824778, + "loss": 3.2522, + "step": 4126 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002659947051910436, + "loss": 3.2679, + "step": 4127 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026576395596861554, + "loss": 3.1023, + "step": 4128 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002655332706434419, + "loss": 3.1887, + "step": 4129 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002653026492784509, + "loss": 2.9679, + "step": 4130 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026507209193655444, + "loss": 3.2127, + "step": 4131 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026484159868064584, + "loss": 3.0054, + "step": 4132 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002646111695736013, + "loss": 3.3069, + "step": 4133 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002643808046782797, + "loss": 3.2137, + "step": 4134 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002641505040575226, + "loss": 3.0031, + "step": 4135 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002639202677741529, + "loss": 3.1172, + "step": 4136 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002636900958909776, + "loss": 3.1372, + "step": 4137 + }, + { + "epoch": 0.67, + "learning_rate": 0.000263459988470785, + "loss": 3.0422, + "step": 4138 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002632299455763459, + "loss": 3.3007, + "step": 4139 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002629999672704139, + "loss": 2.9857, + "step": 4140 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026277005361572493, + "loss": 3.0287, + "step": 4141 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026254020467499664, + "loss": 3.0796, + "step": 4142 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002623104205109299, + "loss": 3.1754, + "step": 4143 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026208070118620777, + "loss": 2.9953, + "step": 4144 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026185104676349494, + "loss": 3.1351, + "step": 4145 + }, + { + "epoch": 0.67, + "learning_rate": 0.000261621457305439, + "loss": 3.2094, + "step": 4146 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002613919328746698, + "loss": 3.2105, + "step": 4147 + }, + { + "epoch": 0.67, + "learning_rate": 0.000261162473533799, + "loss": 3.2661, + "step": 4148 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002609330793454208, + "loss": 3.1949, + "step": 4149 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026070375037211225, + "loss": 3.2443, + "step": 4150 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002604744866764314, + "loss": 3.2915, + "step": 4151 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026024528832091926, + "loss": 3.0836, + "step": 4152 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026001615536809915, + "loss": 3.1686, + "step": 4153 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002597870878804758, + "loss": 3.1607, + "step": 4154 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025955808592053643, + "loss": 3.179, + "step": 4155 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025932914955075127, + "loss": 3.1991, + "step": 4156 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002591002788335711, + "loss": 3.27, + "step": 4157 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025887147383142997, + "loss": 3.1548, + "step": 4158 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002586427346067436, + "loss": 3.1206, + "step": 4159 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002584140612219095, + "loss": 3.2813, + "step": 4160 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025818545373930743, + "loss": 3.2768, + "step": 4161 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002579569122212998, + "loss": 3.1401, + "step": 4162 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002577284367302299, + "loss": 3.4463, + "step": 4163 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025750002732842384, + "loss": 3.3511, + "step": 4164 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002572716840781894, + "loss": 3.0053, + "step": 4165 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025704340704181614, + "loss": 3.0359, + "step": 4166 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002568151962815757, + "loss": 3.0519, + "step": 4167 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002565870518597223, + "loss": 3.1631, + "step": 4168 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025635897383849095, + "loss": 3.0362, + "step": 4169 + }, + { + "epoch": 0.67, + "learning_rate": 0.000256130962280099, + "loss": 3.242, + "step": 4170 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025590301724674595, + "loss": 3.0901, + "step": 4171 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002556751388006131, + "loss": 3.1802, + "step": 4172 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002554473270038629, + "loss": 3.2809, + "step": 4173 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002552195819186405, + "loss": 3.1039, + "step": 4174 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025499190360707234, + "loss": 3.05, + "step": 4175 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002547642921312669, + "loss": 3.1561, + "step": 4176 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025453674755331426, + "loss": 3.1667, + "step": 4177 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025430926993528645, + "loss": 3.1744, + "step": 4178 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002540818593392369, + "loss": 3.2322, + "step": 4179 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025385451582720086, + "loss": 3.1559, + "step": 4180 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002536272394611956, + "loss": 3.1086, + "step": 4181 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025340003030321977, + "loss": 3.0673, + "step": 4182 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025317288841525377, + "loss": 3.1716, + "step": 4183 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025294581385925974, + "loss": 3.2887, + "step": 4184 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002527188066971811, + "loss": 3.1829, + "step": 4185 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025249186699094326, + "loss": 3.0651, + "step": 4186 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002522649948024531, + "loss": 3.1439, + "step": 4187 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002520381901935992, + "loss": 3.1099, + "step": 4188 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025181145322625146, + "loss": 3.1224, + "step": 4189 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002515847839622617, + "loss": 3.0274, + "step": 4190 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002513581824634626, + "loss": 3.1903, + "step": 4191 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025113164879166886, + "loss": 3.2002, + "step": 4192 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025090518300867717, + "loss": 3.1281, + "step": 4193 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025067878517626445, + "loss": 3.1395, + "step": 4194 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002504524553561901, + "loss": 3.2152, + "step": 4195 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002502261936101948, + "loss": 3.1826, + "step": 4196 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002500000000000001, + "loss": 3.2082, + "step": 4197 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024977387458730954, + "loss": 3.1214, + "step": 4198 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024954781743380785, + "loss": 3.1125, + "step": 4199 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024932182860116115, + "loss": 3.0731, + "step": 4200 + }, + { + "epoch": 0.68, + "learning_rate": 0.000249095908151017, + "loss": 2.8992, + "step": 4201 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024887005614500444, + "loss": 3.2321, + "step": 4202 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002486442726447332, + "loss": 3.1698, + "step": 4203 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002484185577117951, + "loss": 3.2945, + "step": 4204 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002481929114077626, + "loss": 3.2349, + "step": 4205 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024796733379419013, + "loss": 3.2539, + "step": 4206 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002477418249326128, + "loss": 3.1775, + "step": 4207 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002475163848845474, + "loss": 3.1465, + "step": 4208 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002472910137114914, + "loss": 3.2704, + "step": 4209 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002470657114749238, + "loss": 3.1569, + "step": 4210 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002468404782363051, + "loss": 3.0388, + "step": 4211 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024661531405707656, + "loss": 3.1313, + "step": 4212 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002463902189986606, + "loss": 3.2158, + "step": 4213 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024616519312246123, + "loss": 3.2347, + "step": 4214 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024594023648986284, + "loss": 3.1266, + "step": 4215 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024571534916223155, + "loss": 3.2098, + "step": 4216 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002454905312009144, + "loss": 3.2493, + "step": 4217 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002452657826672394, + "loss": 3.0256, + "step": 4218 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002450411036225158, + "loss": 3.2188, + "step": 4219 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002448164941280337, + "loss": 3.1295, + "step": 4220 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024459195424506464, + "loss": 3.1127, + "step": 4221 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024436748403486037, + "loss": 3.1494, + "step": 4222 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002441430835586544, + "loss": 3.1721, + "step": 4223 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002439187528776609, + "loss": 3.0297, + "step": 4224 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024369449205307504, + "loss": 3.2309, + "step": 4225 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024347030114607295, + "loss": 3.0079, + "step": 4226 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024324618021781186, + "loss": 3.052, + "step": 4227 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002430221293294293, + "loss": 3.2588, + "step": 4228 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024279814854204435, + "loss": 3.2389, + "step": 4229 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002425742379167567, + "loss": 3.1093, + "step": 4230 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024235039751464694, + "loss": 3.3073, + "step": 4231 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024212662739677655, + "loss": 3.0591, + "step": 4232 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024190292762418786, + "loss": 3.1953, + "step": 4233 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002416792982579037, + "loss": 3.3459, + "step": 4234 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024145573935892802, + "loss": 3.2357, + "step": 4235 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024123225098824548, + "loss": 3.0579, + "step": 4236 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024100883320682148, + "loss": 3.1938, + "step": 4237 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024078548607560214, + "loss": 3.2051, + "step": 4238 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024056220965551457, + "loss": 3.1117, + "step": 4239 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024033900400746589, + "loss": 3.0689, + "step": 4240 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024011586919234462, + "loss": 3.1101, + "step": 4241 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002398928052710197, + "loss": 3.0901, + "step": 4242 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002396698123043407, + "loss": 3.1744, + "step": 4243 + }, + { + "epoch": 0.68, + "learning_rate": 0.000239446890353138, + "loss": 3.227, + "step": 4244 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023922403947822252, + "loss": 3.3615, + "step": 4245 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023900125974038545, + "loss": 3.2235, + "step": 4246 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023877855120039905, + "loss": 3.2234, + "step": 4247 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002385559139190162, + "loss": 3.4154, + "step": 4248 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023833334795696955, + "loss": 3.2047, + "step": 4249 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002381108533749734, + "loss": 3.2145, + "step": 4250 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023788843023372209, + "loss": 3.1631, + "step": 4251 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023766607859389006, + "loss": 3.1779, + "step": 4252 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002374437985161328, + "loss": 3.171, + "step": 4253 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023722159006108606, + "loss": 3.3265, + "step": 4254 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002369994532893661, + "loss": 3.2081, + "step": 4255 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002367773882615697, + "loss": 3.1658, + "step": 4256 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023655539503827407, + "loss": 3.0066, + "step": 4257 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023633347368003639, + "loss": 3.1828, + "step": 4258 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002361116242473948, + "loss": 3.2217, + "step": 4259 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023588984680086755, + "loss": 3.2927, + "step": 4260 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023566814140095344, + "loss": 3.1804, + "step": 4261 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023544650810813138, + "loss": 3.2444, + "step": 4262 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023522494698286097, + "loss": 2.9836, + "step": 4263 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023500345808558144, + "loss": 3.2735, + "step": 4264 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023478204147671294, + "loss": 3.2904, + "step": 4265 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002345606972166558, + "loss": 3.1513, + "step": 4266 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023433942536579038, + "loss": 3.2664, + "step": 4267 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023411822598447756, + "loss": 3.2827, + "step": 4268 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002338970991330585, + "loss": 3.0973, + "step": 4269 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023367604487185394, + "loss": 3.0368, + "step": 4270 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002334550632611655, + "loss": 3.0457, + "step": 4271 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023323415436127482, + "loss": 3.082, + "step": 4272 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002330133182324437, + "loss": 3.153, + "step": 4273 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002327925549349136, + "loss": 3.0703, + "step": 4274 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023257186452890706, + "loss": 3.1036, + "step": 4275 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002323512470746262, + "loss": 3.3104, + "step": 4276 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023213070263225282, + "loss": 3.0323, + "step": 4277 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023191023126194955, + "loss": 3.063, + "step": 4278 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023168983302385894, + "loss": 3.0741, + "step": 4279 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023146950797810285, + "loss": 3.0996, + "step": 4280 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023124925618478432, + "loss": 3.1348, + "step": 4281 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002310290777039858, + "loss": 3.1846, + "step": 4282 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023080897259576943, + "loss": 3.1313, + "step": 4283 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002305889409201779, + "loss": 3.1428, + "step": 4284 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023036898273723382, + "loss": 3.228, + "step": 4285 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023014909810693907, + "loss": 3.0927, + "step": 4286 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022992928708927645, + "loss": 3.1049, + "step": 4287 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022970954974420828, + "loss": 3.051, + "step": 4288 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022948988613167632, + "loss": 3.287, + "step": 4289 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022927029631160278, + "loss": 2.9917, + "step": 4290 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022905078034388983, + "loss": 3.2523, + "step": 4291 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022883133828841858, + "loss": 3.1499, + "step": 4292 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022861197020505126, + "loss": 3.0912, + "step": 4293 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022839267615362924, + "loss": 3.1126, + "step": 4294 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022817345619397338, + "loss": 3.1517, + "step": 4295 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022795431038588498, + "loss": 3.1545, + "step": 4296 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022773523878914494, + "loss": 3.2291, + "step": 4297 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022751624146351336, + "loss": 3.2285, + "step": 4298 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022729731846873064, + "loss": 3.1711, + "step": 4299 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002270784698645173, + "loss": 3.1627, + "step": 4300 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022685969571057248, + "loss": 3.1063, + "step": 4301 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022664099606657578, + "loss": 3.3439, + "step": 4302 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022642237099218648, + "loss": 3.0449, + "step": 4303 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022620382054704298, + "loss": 3.0033, + "step": 4304 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002259853447907636, + "loss": 3.1899, + "step": 4305 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002257669437829469, + "loss": 3.1683, + "step": 4306 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022554861758316996, + "loss": 3.2902, + "step": 4307 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002253303662509902, + "loss": 3.1518, + "step": 4308 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022511218984594438, + "loss": 3.1665, + "step": 4309 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022489408842754898, + "loss": 3.057, + "step": 4310 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022467606205529945, + "loss": 3.3021, + "step": 4311 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022445811078867185, + "loss": 3.2694, + "step": 4312 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022424023468712058, + "loss": 3.2542, + "step": 4313 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022402243381008024, + "loss": 3.1886, + "step": 4314 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022380470821696476, + "loss": 3.1301, + "step": 4315 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002235870579671677, + "loss": 3.1813, + "step": 4316 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022336948312006127, + "loss": 3.2524, + "step": 4317 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002231519837349985, + "loss": 3.2157, + "step": 4318 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022293455987131052, + "loss": 3.0789, + "step": 4319 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022271721158830855, + "loss": 3.061, + "step": 4320 + }, + { + "epoch": 0.7, + "learning_rate": 0.000222499938945283, + "loss": 3.0671, + "step": 4321 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022228274200150383, + "loss": 3.1777, + "step": 4322 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022206562081621996, + "loss": 3.1716, + "step": 4323 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022184857544865994, + "loss": 3.1554, + "step": 4324 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002216316059580316, + "loss": 3.1727, + "step": 4325 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022141471240352212, + "loss": 3.0826, + "step": 4326 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022119789484429785, + "loss": 3.1011, + "step": 4327 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002209811533395047, + "loss": 3.1726, + "step": 4328 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022076448794826708, + "loss": 3.2459, + "step": 4329 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022054789872968928, + "loss": 3.1833, + "step": 4330 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022033138574285515, + "loss": 3.1993, + "step": 4331 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022011494904682682, + "loss": 3.1781, + "step": 4332 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002198985887006461, + "loss": 3.1976, + "step": 4333 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021968230476333424, + "loss": 3.1598, + "step": 4334 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021946609729389088, + "loss": 3.0296, + "step": 4335 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002192499663512953, + "loss": 3.1073, + "step": 4336 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002190339119945064, + "loss": 3.2129, + "step": 4337 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021881793428246116, + "loss": 3.0633, + "step": 4338 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021860203327407624, + "loss": 3.1796, + "step": 4339 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021838620902824758, + "loss": 3.1979, + "step": 4340 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021817046160384934, + "loss": 3.0947, + "step": 4341 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021795479105973542, + "loss": 3.133, + "step": 4342 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021773919745473908, + "loss": 3.2023, + "step": 4343 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021752368084767155, + "loss": 3.2168, + "step": 4344 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002173082412973238, + "loss": 3.1128, + "step": 4345 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021709287886246577, + "loss": 3.357, + "step": 4346 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021687759360184577, + "loss": 3.208, + "step": 4347 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021666238557419178, + "loss": 3.1786, + "step": 4348 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021644725483821025, + "loss": 3.1689, + "step": 4349 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021623220145258683, + "loss": 3.0795, + "step": 4350 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002160172254759858, + "loss": 3.1103, + "step": 4351 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021580232696705076, + "loss": 3.2286, + "step": 4352 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021558750598440346, + "loss": 3.2055, + "step": 4353 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021537276258664511, + "loss": 3.1061, + "step": 4354 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002151580968323556, + "loss": 3.1011, + "step": 4355 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002149435087800935, + "loss": 3.1195, + "step": 4356 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021472899848839645, + "loss": 3.3282, + "step": 4357 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021451456601578056, + "loss": 3.0536, + "step": 4358 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021430021142074113, + "loss": 3.302, + "step": 4359 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002140859347617516, + "loss": 2.993, + "step": 4360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021387173609726463, + "loss": 3.0391, + "step": 4361 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002136576154857115, + "loss": 3.2748, + "step": 4362 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002134435729855022, + "loss": 3.2154, + "step": 4363 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021322960865502533, + "loss": 3.1719, + "step": 4364 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002130157225526485, + "loss": 3.1404, + "step": 4365 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002128019147367173, + "loss": 3.2187, + "step": 4366 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021258818526555647, + "loss": 3.3178, + "step": 4367 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021237453419746934, + "loss": 3.1361, + "step": 4368 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021216096159073784, + "loss": 3.0917, + "step": 4369 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021194746750362236, + "loss": 3.2578, + "step": 4370 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021173405199436217, + "loss": 3.1958, + "step": 4371 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021152071512117455, + "loss": 3.0908, + "step": 4372 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021130745694225578, + "loss": 3.1061, + "step": 4373 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021109427751578064, + "loss": 3.1406, + "step": 4374 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021088117689990234, + "loss": 2.9409, + "step": 4375 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021066815515275255, + "loss": 3.176, + "step": 4376 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021045521233244169, + "loss": 3.2612, + "step": 4377 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021024234849705809, + "loss": 3.2818, + "step": 4378 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021002956370466902, + "loss": 3.1456, + "step": 4379 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020981685801332013, + "loss": 3.0804, + "step": 4380 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020960423148103525, + "loss": 3.2385, + "step": 4381 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020939168416581695, + "loss": 3.1754, + "step": 4382 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020917921612564606, + "loss": 3.1608, + "step": 4383 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020896682741848143, + "loss": 3.1386, + "step": 4384 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020875451810226081, + "loss": 3.1207, + "step": 4385 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020854228823490001, + "loss": 3.2492, + "step": 4386 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020833013787429323, + "loss": 3.2126, + "step": 4387 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020811806707831299, + "loss": 3.202, + "step": 4388 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020790607590481019, + "loss": 3.202, + "step": 4389 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020769416441161366, + "loss": 3.2607, + "step": 4390 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020748233265653084, + "loss": 3.1455, + "step": 4391 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002072705806973473, + "loss": 3.1702, + "step": 4392 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020705890859182692, + "loss": 3.1232, + "step": 4393 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002068473163977117, + "loss": 3.2072, + "step": 4394 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002066358041727221, + "loss": 3.213, + "step": 4395 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020642437197455617, + "loss": 3.1671, + "step": 4396 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020621301986089064, + "loss": 3.1399, + "step": 4397 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020600174788938026, + "loss": 3.1052, + "step": 4398 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020579055611765797, + "loss": 3.1146, + "step": 4399 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020557944460333478, + "loss": 3.1345, + "step": 4400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020536841340399988, + "loss": 3.0773, + "step": 4401 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020515746257722024, + "loss": 3.1409, + "step": 4402 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020494659218054124, + "loss": 3.009, + "step": 4403 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020473580227148625, + "loss": 3.301, + "step": 4404 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020452509290755667, + "loss": 3.0811, + "step": 4405 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020431446414623185, + "loss": 3.1263, + "step": 4406 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020410391604496947, + "loss": 3.2726, + "step": 4407 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002038934486612045, + "loss": 3.1985, + "step": 4408 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002036830620523506, + "loss": 3.217, + "step": 4409 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020347275627579915, + "loss": 3.0197, + "step": 4410 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020326253138891936, + "loss": 3.1509, + "step": 4411 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020305238744905857, + "loss": 3.2235, + "step": 4412 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020284232451354189, + "loss": 3.056, + "step": 4413 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020263234263967266, + "loss": 3.1263, + "step": 4414 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020242244188473142, + "loss": 3.051, + "step": 4415 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020221262230597716, + "loss": 3.2285, + "step": 4416 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020200288396064658, + "loss": 3.3223, + "step": 4417 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020179322690595426, + "loss": 3.3599, + "step": 4418 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002015836511990925, + "loss": 3.2719, + "step": 4419 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002013741568972316, + "loss": 3.2008, + "step": 4420 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020116474405751922, + "loss": 3.0324, + "step": 4421 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020095541273708128, + "loss": 3.2074, + "step": 4422 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020074616299302144, + "loss": 3.0828, + "step": 4423 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002005369948824204, + "loss": 3.3061, + "step": 4424 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020032790846233762, + "loss": 3.1345, + "step": 4425 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020011890378980983, + "loss": 3.1573, + "step": 4426 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019990998092185108, + "loss": 3.1563, + "step": 4427 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019970113991545364, + "loss": 3.1301, + "step": 4428 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019949238082758735, + "loss": 3.1447, + "step": 4429 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001992837037151991, + "loss": 3.1626, + "step": 4430 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019907510863521449, + "loss": 3.1769, + "step": 4431 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019886659564453618, + "loss": 3.037, + "step": 4432 + }, + { + "epoch": 0.71, + "learning_rate": 0.000198658164800044, + "loss": 3.1764, + "step": 4433 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001984498161585961, + "loss": 3.0076, + "step": 4434 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019824154977702795, + "loss": 3.2423, + "step": 4435 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019803336571215212, + "loss": 3.1217, + "step": 4436 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019782526402075963, + "loss": 3.1258, + "step": 4437 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001976172447596185, + "loss": 3.3219, + "step": 4438 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019740930798547407, + "loss": 3.1885, + "step": 4439 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001972014537550495, + "loss": 3.1222, + "step": 4440 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019699368212504554, + "loss": 3.0396, + "step": 4441 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019678599315213968, + "loss": 3.2, + "step": 4442 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019657838689298796, + "loss": 3.1946, + "step": 4443 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019637086340422327, + "loss": 3.1946, + "step": 4444 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019616342274245564, + "loss": 2.9829, + "step": 4445 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019595606496427293, + "loss": 3.2215, + "step": 4446 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001957487901262403, + "loss": 3.1195, + "step": 4447 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001955415982849004, + "loss": 3.1998, + "step": 4448 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019533448949677262, + "loss": 3.2898, + "step": 4449 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019512746381835483, + "loss": 3.1303, + "step": 4450 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019492052130612103, + "loss": 3.1352, + "step": 4451 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019471366201652328, + "loss": 3.2404, + "step": 4452 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019450688600599077, + "loss": 3.3935, + "step": 4453 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019430019333093002, + "loss": 3.1514, + "step": 4454 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019409358404772425, + "loss": 3.2582, + "step": 4455 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019388705821273512, + "loss": 3.2168, + "step": 4456 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001936806158823003, + "loss": 3.1718, + "step": 4457 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019347425711273525, + "loss": 3.0814, + "step": 4458 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019326798196033274, + "loss": 3.1877, + "step": 4459 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001930617904813627, + "loss": 3.2138, + "step": 4460 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019285568273207156, + "loss": 3.2122, + "step": 4461 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019264965876868395, + "loss": 3.1165, + "step": 4462 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019244371864740118, + "loss": 3.1591, + "step": 4463 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001922378624244014, + "loss": 3.1526, + "step": 4464 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019203209015584012, + "loss": 3.3269, + "step": 4465 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001918264018978503, + "loss": 3.1421, + "step": 4466 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019162079770654105, + "loss": 3.1378, + "step": 4467 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019141527763799965, + "loss": 3.0, + "step": 4468 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019120984174828999, + "loss": 3.3038, + "step": 4469 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001910044900934525, + "loss": 3.0214, + "step": 4470 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001907992227295053, + "loss": 3.158, + "step": 4471 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001905940397124434, + "loss": 2.9944, + "step": 4472 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019038894109823834, + "loss": 3.2414, + "step": 4473 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019018392694283905, + "loss": 3.1629, + "step": 4474 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018997899730217177, + "loss": 3.3381, + "step": 4475 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001897741522321388, + "loss": 3.0663, + "step": 4476 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018956939178861994, + "loss": 3.0867, + "step": 4477 + }, + { + "epoch": 0.72, + "learning_rate": 0.000189364716027472, + "loss": 3.3121, + "step": 4478 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018916012500452817, + "loss": 3.1079, + "step": 4479 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001889556187755988, + "loss": 3.3009, + "step": 4480 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018875119739647172, + "loss": 3.1446, + "step": 4481 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018854686092291046, + "loss": 3.2041, + "step": 4482 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018834260941065611, + "loss": 3.1649, + "step": 4483 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018813844291542677, + "loss": 3.1978, + "step": 4484 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018793436149291647, + "loss": 3.1597, + "step": 4485 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018773036519879676, + "loss": 3.1231, + "step": 4486 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018752645408871626, + "loss": 3.292, + "step": 4487 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018732262821829931, + "loss": 3.1241, + "step": 4488 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018711888764314777, + "loss": 3.1956, + "step": 4489 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018691523241884029, + "loss": 3.1702, + "step": 4490 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018671166260093147, + "loss": 2.9963, + "step": 4491 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001865081782449533, + "loss": 3.2701, + "step": 4492 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018630477940641472, + "loss": 3.2058, + "step": 4493 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018610146614080037, + "loss": 3.0433, + "step": 4494 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018589823850357224, + "loss": 3.0629, + "step": 4495 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018569509655016886, + "loss": 3.2751, + "step": 4496 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018549204033600547, + "loss": 3.2882, + "step": 4497 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018528906991647343, + "loss": 3.2119, + "step": 4498 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018508618534694121, + "loss": 3.1328, + "step": 4499 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018488338668275369, + "loss": 3.1851, + "step": 4500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018468067397923231, + "loss": 3.1355, + "step": 4501 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018447804729167518, + "loss": 3.1353, + "step": 4502 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018427550667535686, + "loss": 3.0431, + "step": 4503 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018407305218552815, + "loss": 3.0502, + "step": 4504 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018387068387741679, + "loss": 3.0584, + "step": 4505 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018366840180622675, + "loss": 3.2285, + "step": 4506 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018346620602713865, + "loss": 3.0916, + "step": 4507 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018326409659530945, + "loss": 3.1314, + "step": 4508 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018306207356587274, + "loss": 3.3056, + "step": 4509 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018286013699393805, + "loss": 3.2546, + "step": 4510 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001826582869345919, + "loss": 3.2522, + "step": 4511 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018245652344289686, + "loss": 3.1361, + "step": 4512 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018225484657389206, + "loss": 3.2023, + "step": 4513 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001820532563825929, + "loss": 3.1521, + "step": 4514 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018185175292399143, + "loss": 3.0769, + "step": 4515 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018165033625305526, + "loss": 3.104, + "step": 4516 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018144900642472905, + "loss": 3.1297, + "step": 4517 + }, + { + "epoch": 0.73, + "learning_rate": 0.000181247763493934, + "loss": 3.0331, + "step": 4518 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001810466075155666, + "loss": 3.1865, + "step": 4519 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018084553854450048, + "loss": 2.997, + "step": 4520 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018064455663558532, + "loss": 3.3253, + "step": 4521 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001804436618436467, + "loss": 3.2662, + "step": 4522 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018024285422348679, + "loss": 3.2048, + "step": 4523 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018004213382988405, + "loss": 3.1431, + "step": 4524 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017984150071759288, + "loss": 3.0896, + "step": 4525 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017964095494134403, + "loss": 3.113, + "step": 4526 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017944049655584454, + "loss": 3.1699, + "step": 4527 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017924012561577714, + "loss": 3.3035, + "step": 4528 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017903984217580116, + "loss": 3.225, + "step": 4529 + }, + { + "epoch": 0.73, + "learning_rate": 0.000178839646290552, + "loss": 3.1091, + "step": 4530 + }, + { + "epoch": 0.73, + "learning_rate": 0.000178639538014641, + "loss": 3.1416, + "step": 4531 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017843951740265578, + "loss": 3.1746, + "step": 4532 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001782395845091601, + "loss": 3.1843, + "step": 4533 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001780397393886933, + "loss": 3.2838, + "step": 4534 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017783998209577135, + "loss": 3.1648, + "step": 4535 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017764031268488595, + "loss": 3.1085, + "step": 4536 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017744073121050508, + "loss": 3.1781, + "step": 4537 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001772412377270724, + "loss": 3.2897, + "step": 4538 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017704183228900805, + "loss": 3.1062, + "step": 4539 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001768425149507074, + "loss": 3.1701, + "step": 4540 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001766432857665425, + "loss": 3.1701, + "step": 4541 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017644414479086102, + "loss": 3.1457, + "step": 4542 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017624509207798662, + "loss": 3.0751, + "step": 4543 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001760461276822189, + "loss": 3.1069, + "step": 4544 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017584725165783354, + "loss": 3.1885, + "step": 4545 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017564846405908164, + "loss": 3.0473, + "step": 4546 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017544976494019056, + "loss": 3.057, + "step": 4547 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017525115435536347, + "loss": 3.2509, + "step": 4548 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017505263235877944, + "loss": 3.1918, + "step": 4549 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001748541990045931, + "loss": 3.0851, + "step": 4550 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017465585434693525, + "loss": 3.047, + "step": 4551 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017445759843991244, + "loss": 3.2151, + "step": 4552 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017425943133760657, + "loss": 3.0513, + "step": 4553 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017406135309407583, + "loss": 3.148, + "step": 4554 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017386336376335398, + "loss": 3.1805, + "step": 4555 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001736654633994505, + "loss": 3.1815, + "step": 4556 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017346765205635068, + "loss": 3.1679, + "step": 4557 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017326992978801563, + "loss": 3.1974, + "step": 4558 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001730722966483817, + "loss": 3.1465, + "step": 4559 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017287475269136133, + "loss": 3.1986, + "step": 4560 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017267729797084265, + "loss": 3.129, + "step": 4561 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017247993254068921, + "loss": 3.1412, + "step": 4562 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001722826564547404, + "loss": 3.1509, + "step": 4563 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017208546976681138, + "loss": 3.1893, + "step": 4564 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017188837253069228, + "loss": 3.1064, + "step": 4565 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017169136480014947, + "loss": 3.0647, + "step": 4566 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017149444662892472, + "loss": 3.2034, + "step": 4567 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001712976180707353, + "loss": 3.1724, + "step": 4568 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017110087917927407, + "loss": 2.9871, + "step": 4569 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017090423000820965, + "loss": 3.1078, + "step": 4570 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001707076706111856, + "loss": 3.1918, + "step": 4571 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001705112010418215, + "loss": 3.1108, + "step": 4572 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017031482135371252, + "loss": 3.251, + "step": 4573 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017011853160042855, + "loss": 3.3603, + "step": 4574 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016992233183551596, + "loss": 3.0229, + "step": 4575 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001697262221124961, + "loss": 3.0078, + "step": 4576 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016953020248486545, + "loss": 3.1344, + "step": 4577 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016933427300609626, + "loss": 3.0572, + "step": 4578 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016913843372963645, + "loss": 3.1619, + "step": 4579 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016894268470890844, + "loss": 3.0956, + "step": 4580 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016874702599731106, + "loss": 3.0275, + "step": 4581 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016855145764821817, + "loss": 3.1206, + "step": 4582 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016835597971497847, + "loss": 3.1665, + "step": 4583 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001681605922509165, + "loss": 3.2499, + "step": 4584 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001679652953093321, + "loss": 3.2056, + "step": 4585 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016777008894350027, + "loss": 3.1612, + "step": 4586 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016757497320667137, + "loss": 3.1377, + "step": 4587 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016737994815207126, + "loss": 3.0569, + "step": 4588 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016718501383290035, + "loss": 3.0067, + "step": 4589 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016699017030233509, + "loss": 3.0986, + "step": 4590 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016679541761352673, + "loss": 3.1474, + "step": 4591 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016660075581960198, + "loss": 3.0609, + "step": 4592 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016640618497366255, + "loss": 3.1537, + "step": 4593 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016621170512878559, + "loss": 3.1354, + "step": 4594 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001660173163380229, + "loss": 3.0581, + "step": 4595 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016582301865440213, + "loss": 3.3011, + "step": 4596 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016562881213092557, + "loss": 3.0772, + "step": 4597 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016543469682057105, + "loss": 3.1538, + "step": 4598 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001652406727762908, + "loss": 3.0836, + "step": 4599 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001650467400510131, + "loss": 3.0759, + "step": 4600 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001648528986976409, + "loss": 3.1682, + "step": 4601 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016465914876905192, + "loss": 3.1532, + "step": 4602 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001644654903180992, + "loss": 3.1793, + "step": 4603 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016427192339761115, + "loss": 3.1389, + "step": 4604 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016407844806039034, + "loss": 3.2438, + "step": 4605 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016388506435921542, + "loss": 3.1604, + "step": 4606 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016369177234683952, + "loss": 3.2712, + "step": 4607 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016349857207599046, + "loss": 3.1298, + "step": 4608 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016330546359937144, + "loss": 3.0876, + "step": 4609 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016311244696966075, + "loss": 3.1551, + "step": 4610 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016291952223951094, + "loss": 3.056, + "step": 4611 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016272668946155038, + "loss": 3.0544, + "step": 4612 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016253394868838195, + "loss": 3.0717, + "step": 4613 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016234129997258308, + "loss": 3.0896, + "step": 4614 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001621487433667066, + "loss": 3.0597, + "step": 4615 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016195627892328014, + "loss": 3.1656, + "step": 4616 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016176390669480568, + "loss": 3.0709, + "step": 4617 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001615716267337608, + "loss": 3.0979, + "step": 4618 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016137943909259766, + "loss": 3.1539, + "step": 4619 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016118734382374278, + "loss": 3.1911, + "step": 4620 + }, + { + "epoch": 0.75, + "learning_rate": 0.000160995340979598, + "loss": 3.1721, + "step": 4621 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016080343061253993, + "loss": 3.2664, + "step": 4622 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001606116127749194, + "loss": 3.1529, + "step": 4623 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001604198875190626, + "loss": 3.0861, + "step": 4624 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016022825489727055, + "loss": 2.9629, + "step": 4625 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016003671496181833, + "loss": 3.1157, + "step": 4626 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015984526776495627, + "loss": 3.2161, + "step": 4627 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001596539133589094, + "loss": 3.1533, + "step": 4628 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015946265179587698, + "loss": 3.0633, + "step": 4629 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015927148312803324, + "loss": 3.0741, + "step": 4630 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001590804074075276, + "loss": 3.2475, + "step": 4631 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015888942468648309, + "loss": 3.0978, + "step": 4632 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001586985350169981, + "loss": 3.2043, + "step": 4633 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015850773845114534, + "loss": 3.178, + "step": 4634 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001583170350409725, + "loss": 3.0867, + "step": 4635 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015812642483850094, + "loss": 3.1799, + "step": 4636 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015793590789572792, + "loss": 2.9196, + "step": 4637 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015774548426462416, + "loss": 3.3066, + "step": 4638 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015755515399713533, + "loss": 3.2982, + "step": 4639 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015736491714518175, + "loss": 3.1342, + "step": 4640 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015717477376065824, + "loss": 3.2029, + "step": 4641 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001569847238954336, + "loss": 3.1513, + "step": 4642 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015679476760135209, + "loss": 3.3632, + "step": 4643 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015660490493023144, + "loss": 3.0427, + "step": 4644 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015641513593386448, + "loss": 3.0676, + "step": 4645 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015622546066401826, + "loss": 3.1386, + "step": 4646 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001560358791724345, + "loss": 3.2193, + "step": 4647 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015584639151082873, + "loss": 3.126, + "step": 4648 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001556569977308916, + "loss": 3.0454, + "step": 4649 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015546769788428772, + "loss": 3.0527, + "step": 4650 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015527849202265625, + "loss": 3.1509, + "step": 4651 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015508938019761066, + "loss": 3.0953, + "step": 4652 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001549003624607389, + "loss": 3.111, + "step": 4653 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015471143886360277, + "loss": 3.1105, + "step": 4654 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001545226094577389, + "loss": 2.9829, + "step": 4655 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015433387429465833, + "loss": 3.1464, + "step": 4656 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001541452334258458, + "loss": 3.3385, + "step": 4657 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015395668690276076, + "loss": 3.0799, + "step": 4658 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015376823477683698, + "loss": 3.0852, + "step": 4659 + }, + { + "epoch": 0.75, + "learning_rate": 0.000153579877099482, + "loss": 3.1752, + "step": 4660 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015339161392207795, + "loss": 3.116, + "step": 4661 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015320344529598162, + "loss": 3.1615, + "step": 4662 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015301537127252295, + "loss": 3.2399, + "step": 4663 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015282739190300697, + "loss": 3.2792, + "step": 4664 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001526395072387126, + "loss": 3.1532, + "step": 4665 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001524517173308927, + "loss": 3.0397, + "step": 4666 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001522640222307744, + "loss": 3.0568, + "step": 4667 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015207642198955957, + "loss": 3.1239, + "step": 4668 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015188891665842314, + "loss": 3.0319, + "step": 4669 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015170150628851493, + "loss": 3.0856, + "step": 4670 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015151419093095876, + "loss": 3.1884, + "step": 4671 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015132697063685203, + "loss": 3.0688, + "step": 4672 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015113984545726673, + "loss": 3.2576, + "step": 4673 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001509528154432488, + "loss": 3.242, + "step": 4674 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001507658806458181, + "loss": 3.2179, + "step": 4675 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001505790411159686, + "loss": 3.2161, + "step": 4676 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015039229690466845, + "loss": 3.2918, + "step": 4677 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015020564806285925, + "loss": 3.0003, + "step": 4678 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015001909464145707, + "loss": 3.082, + "step": 4679 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014983263669135187, + "loss": 3.3381, + "step": 4680 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014964627426340755, + "loss": 3.1087, + "step": 4681 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014946000740846183, + "loss": 3.2561, + "step": 4682 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014927383617732666, + "loss": 3.3689, + "step": 4683 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014908776062078733, + "loss": 3.2829, + "step": 4684 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014890178078960366, + "loss": 3.2179, + "step": 4685 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014871589673450897, + "loss": 3.1368, + "step": 4686 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014853010850621062, + "loss": 3.1453, + "step": 4687 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014834441615538984, + "loss": 3.0794, + "step": 4688 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014815881973270157, + "loss": 3.2073, + "step": 4689 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014797331928877484, + "loss": 3.072, + "step": 4690 + }, + { + "epoch": 0.76, + "learning_rate": 0.000147787914874212, + "loss": 3.0164, + "step": 4691 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014760260653958973, + "loss": 3.0295, + "step": 4692 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014741739433545825, + "loss": 3.1035, + "step": 4693 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014723227831234159, + "loss": 3.2157, + "step": 4694 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001470472585207376, + "loss": 3.1863, + "step": 4695 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001468623350111179, + "loss": 3.125, + "step": 4696 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014667750783392753, + "loss": 3.0151, + "step": 4697 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001464927770395857, + "loss": 3.2317, + "step": 4698 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014630814267848502, + "loss": 3.179, + "step": 4699 + }, + { + "epoch": 0.76, + "learning_rate": 0.000146123604800992, + "loss": 3.1452, + "step": 4700 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014593916345744667, + "loss": 3.1667, + "step": 4701 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014575481869816292, + "loss": 3.1468, + "step": 4702 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014557057057342792, + "loss": 3.0361, + "step": 4703 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014538641913350287, + "loss": 3.0457, + "step": 4704 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014520236442862238, + "loss": 3.218, + "step": 4705 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014501840650899477, + "loss": 3.2558, + "step": 4706 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001448345454248019, + "loss": 3.2224, + "step": 4707 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014465078122619945, + "loss": 3.0929, + "step": 4708 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014446711396331614, + "loss": 3.2538, + "step": 4709 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014428354368625467, + "loss": 3.0254, + "step": 4710 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014410007044509123, + "loss": 3.0677, + "step": 4711 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001439166942898754, + "loss": 3.1195, + "step": 4712 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001437334152706305, + "loss": 3.243, + "step": 4713 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001435502334373533, + "loss": 3.1226, + "step": 4714 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014336714884001362, + "loss": 3.1822, + "step": 4715 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014318416152855535, + "loss": 3.2075, + "step": 4716 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014300127155289572, + "loss": 3.0089, + "step": 4717 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014281847896292484, + "loss": 3.3334, + "step": 4718 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014263578380850706, + "loss": 3.0239, + "step": 4719 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014245318613947998, + "loss": 3.0748, + "step": 4720 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014227068600565395, + "loss": 3.1174, + "step": 4721 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014208828345681345, + "loss": 3.019, + "step": 4722 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014190597854271602, + "loss": 3.2834, + "step": 4723 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014172377131309267, + "loss": 3.1406, + "step": 4724 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001415416618176476, + "loss": 3.1112, + "step": 4725 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014135965010605878, + "loss": 3.1568, + "step": 4726 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001411777362279767, + "loss": 3.0292, + "step": 4727 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014099592023302594, + "loss": 3.1026, + "step": 4728 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014081420217080403, + "loss": 3.1332, + "step": 4729 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014063258209088186, + "loss": 3.119, + "step": 4730 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014045106004280362, + "loss": 3.0669, + "step": 4731 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001402696360760868, + "loss": 3.2539, + "step": 4732 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014008831024022166, + "loss": 3.3096, + "step": 4733 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013990708258467243, + "loss": 3.2309, + "step": 4734 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001397259531588761, + "loss": 3.0476, + "step": 4735 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013954492201224289, + "loss": 3.0703, + "step": 4736 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013936398919415645, + "loss": 3.0459, + "step": 4737 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013918315475397336, + "loss": 3.1504, + "step": 4738 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001390024187410236, + "loss": 3.1693, + "step": 4739 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001388217812046098, + "loss": 3.2382, + "step": 4740 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013864124219400837, + "loss": 3.1005, + "step": 4741 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001384608017584686, + "loss": 3.0691, + "step": 4742 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013828045994721238, + "loss": 3.193, + "step": 4743 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013810021680943563, + "loss": 3.2702, + "step": 4744 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013792007239430687, + "loss": 3.0813, + "step": 4745 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013774002675096743, + "loss": 3.175, + "step": 4746 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013756007992853209, + "loss": 3.1572, + "step": 4747 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013738023197608866, + "loss": 3.1296, + "step": 4748 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001372004829426975, + "loss": 3.2493, + "step": 4749 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001370208328773927, + "loss": 3.227, + "step": 4750 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001368412818291811, + "loss": 3.2034, + "step": 4751 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013666182984704207, + "loss": 3.1294, + "step": 4752 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013648247697992848, + "loss": 3.1629, + "step": 4753 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001363032232767662, + "loss": 3.1215, + "step": 4754 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013612406878645334, + "loss": 3.1865, + "step": 4755 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013594501355786197, + "loss": 3.1684, + "step": 4756 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001357660576398365, + "loss": 3.3268, + "step": 4757 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013558720108119404, + "loss": 3.1857, + "step": 4758 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013540844393072505, + "loss": 3.1692, + "step": 4759 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013522978623719279, + "loss": 3.1543, + "step": 4760 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001350512280493329, + "loss": 3.2336, + "step": 4761 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001348727694158547, + "loss": 3.1581, + "step": 4762 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013469441038543994, + "loss": 3.1, + "step": 4763 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013451615100674286, + "loss": 3.195, + "step": 4764 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013433799132839098, + "loss": 3.0023, + "step": 4765 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001341599313989847, + "loss": 3.1391, + "step": 4766 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001339819712670966, + "loss": 3.1696, + "step": 4767 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013380411098127244, + "loss": 2.9665, + "step": 4768 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013362635059003126, + "loss": 3.1726, + "step": 4769 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013344869014186378, + "loss": 3.1854, + "step": 4770 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001332711296852342, + "loss": 3.0604, + "step": 4771 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013309366926857923, + "loss": 3.1276, + "step": 4772 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001329163089403085, + "loss": 3.1862, + "step": 4773 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013273904874880354, + "loss": 3.0583, + "step": 4774 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013256188874241986, + "loss": 3.2679, + "step": 4775 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001323848289694845, + "loss": 3.1481, + "step": 4776 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013220786947829778, + "loss": 3.1598, + "step": 4777 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013203101031713239, + "loss": 3.2316, + "step": 4778 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013185425153423391, + "loss": 3.264, + "step": 4779 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013167759317782003, + "loss": 3.1103, + "step": 4780 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013150103529608183, + "loss": 3.0893, + "step": 4781 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013132457793718218, + "loss": 3.1284, + "step": 4782 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013114822114925701, + "loss": 3.2, + "step": 4783 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013097196498041474, + "loss": 3.1236, + "step": 4784 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013079580947873638, + "loss": 3.1939, + "step": 4785 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013061975469227493, + "loss": 3.1774, + "step": 4786 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001304438006690571, + "loss": 3.1344, + "step": 4787 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013026794745708077, + "loss": 3.1989, + "step": 4788 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001300921951043172, + "loss": 3.1928, + "step": 4789 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012991654365870987, + "loss": 3.1758, + "step": 4790 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012974099316817483, + "loss": 3.2121, + "step": 4791 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001295655436806003, + "loss": 3.1831, + "step": 4792 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012939019524384703, + "loss": 3.1475, + "step": 4793 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001292149479057489, + "loss": 3.1872, + "step": 4794 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012903980171411107, + "loss": 3.3097, + "step": 4795 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012886475671671188, + "loss": 3.1364, + "step": 4796 + }, + { + "epoch": 0.77, + "learning_rate": 0.000128689812961302, + "loss": 3.1821, + "step": 4797 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012851497049560406, + "loss": 3.2509, + "step": 4798 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012834022936731332, + "loss": 3.0814, + "step": 4799 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012816558962409785, + "loss": 3.1439, + "step": 4800 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012799105131359719, + "loss": 3.2606, + "step": 4801 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001278166144834238, + "loss": 3.1709, + "step": 4802 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012764227918116245, + "loss": 3.215, + "step": 4803 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001274680454543698, + "loss": 3.1228, + "step": 4804 + }, + { + "epoch": 0.77, + "learning_rate": 0.000127293913350575, + "loss": 3.2092, + "step": 4805 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001271198829172801, + "loss": 3.1427, + "step": 4806 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012694595420195836, + "loss": 3.0412, + "step": 4807 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012677212725205594, + "loss": 3.0979, + "step": 4808 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012659840211499124, + "loss": 3.1395, + "step": 4809 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012642477883815446, + "loss": 3.2791, + "step": 4810 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001262512574689083, + "loss": 3.0985, + "step": 4811 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012607783805458812, + "loss": 3.1743, + "step": 4812 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012590452064250053, + "loss": 3.1178, + "step": 4813 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012573130527992492, + "loss": 3.2321, + "step": 4814 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001255581920141129, + "loss": 2.9761, + "step": 4815 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012538518089228774, + "loss": 3.1456, + "step": 4816 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001252122719616453, + "loss": 3.2656, + "step": 4817 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001250394652693534, + "loss": 3.2394, + "step": 4818 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012486676086255195, + "loss": 3.1443, + "step": 4819 + }, + { + "epoch": 0.78, + "learning_rate": 0.000124694158788353, + "loss": 3.0552, + "step": 4820 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001245216590938409, + "loss": 3.1, + "step": 4821 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012434926182607143, + "loss": 3.0593, + "step": 4822 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012417696703207303, + "loss": 3.1442, + "step": 4823 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012400477475884604, + "loss": 3.1415, + "step": 4824 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012383268505336277, + "loss": 3.1526, + "step": 4825 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012366069796256756, + "loss": 3.1983, + "step": 4826 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001234888135333768, + "loss": 3.1195, + "step": 4827 + }, + { + "epoch": 0.78, + "learning_rate": 0.000123317031812679, + "loss": 3.0281, + "step": 4828 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012314535284733413, + "loss": 3.1642, + "step": 4829 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012297377668417476, + "loss": 3.2705, + "step": 4830 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012280230337000504, + "loss": 3.2031, + "step": 4831 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001226309329516012, + "loss": 3.2556, + "step": 4832 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012245966547571146, + "loss": 3.203, + "step": 4833 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012228850098905598, + "loss": 3.052, + "step": 4834 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012211743953832637, + "loss": 3.1725, + "step": 4835 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012194648117018676, + "loss": 3.173, + "step": 4836 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012177562593127272, + "loss": 3.1276, + "step": 4837 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012160487386819202, + "loss": 3.0708, + "step": 4838 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012143422502752399, + "loss": 3.1303, + "step": 4839 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012126367945582017, + "loss": 3.1881, + "step": 4840 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012109323719960336, + "loss": 3.0731, + "step": 4841 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012092289830536857, + "loss": 3.144, + "step": 4842 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012075266281958269, + "loss": 3.2424, + "step": 4843 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001205825307886842, + "loss": 3.3722, + "step": 4844 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001204125022590834, + "loss": 3.1274, + "step": 4845 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012024257727716253, + "loss": 3.1859, + "step": 4846 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012007275588927519, + "loss": 3.228, + "step": 4847 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011990303814174702, + "loss": 3.2129, + "step": 4848 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001197334240808754, + "loss": 3.084, + "step": 4849 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001195639137529293, + "loss": 3.1734, + "step": 4850 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011939450720414946, + "loss": 3.2095, + "step": 4851 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011922520448074841, + "loss": 3.0224, + "step": 4852 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011905600562891006, + "loss": 3.1346, + "step": 4853 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011888691069479018, + "loss": 3.0641, + "step": 4854 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011871791972451628, + "loss": 3.0481, + "step": 4855 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011854903276418738, + "loss": 3.1751, + "step": 4856 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011838024985987411, + "loss": 3.0715, + "step": 4857 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011821157105761898, + "loss": 3.0544, + "step": 4858 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011804299640343551, + "loss": 3.1181, + "step": 4859 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011787452594330944, + "loss": 3.4344, + "step": 4860 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011770615972319771, + "loss": 3.0966, + "step": 4861 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011753789778902902, + "loss": 3.2174, + "step": 4862 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001173697401867035, + "loss": 3.1807, + "step": 4863 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011720168696209305, + "loss": 3.0867, + "step": 4864 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011703373816104062, + "loss": 3.3051, + "step": 4865 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011686589382936108, + "loss": 3.0291, + "step": 4866 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011669815401284073, + "loss": 3.2071, + "step": 4867 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011653051875723725, + "loss": 3.0775, + "step": 4868 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011636298810827994, + "loss": 3.2901, + "step": 4869 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001161955621116696, + "loss": 3.1684, + "step": 4870 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011602824081307805, + "loss": 3.0837, + "step": 4871 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001158610242581491, + "loss": 2.9655, + "step": 4872 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011569391249249766, + "loss": 3.1339, + "step": 4873 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011552690556171019, + "loss": 3.1537, + "step": 4874 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011536000351134457, + "loss": 3.3432, + "step": 4875 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011519320638692993, + "loss": 3.1176, + "step": 4876 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011502651423396698, + "loss": 3.2177, + "step": 4877 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011485992709792753, + "loss": 3.1806, + "step": 4878 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011469344502425488, + "loss": 3.1027, + "step": 4879 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011452706805836376, + "loss": 3.0263, + "step": 4880 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011436079624564011, + "loss": 3.0376, + "step": 4881 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011419462963144129, + "loss": 3.2135, + "step": 4882 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011402856826109592, + "loss": 3.2435, + "step": 4883 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011386261217990368, + "loss": 3.1854, + "step": 4884 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001136967614331359, + "loss": 3.3745, + "step": 4885 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011353101606603489, + "loss": 3.1521, + "step": 4886 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011336537612381448, + "loss": 2.9812, + "step": 4887 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011319984165165947, + "loss": 3.1545, + "step": 4888 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001130344126947262, + "loss": 3.121, + "step": 4889 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011286908929814176, + "loss": 3.2406, + "step": 4890 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011270387150700489, + "loss": 3.1395, + "step": 4891 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011253875936638542, + "loss": 3.1566, + "step": 4892 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011237375292132395, + "loss": 3.1297, + "step": 4893 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011220885221683297, + "loss": 3.0869, + "step": 4894 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011204405729789574, + "loss": 3.2534, + "step": 4895 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011187936820946642, + "loss": 3.2705, + "step": 4896 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011171478499647064, + "loss": 3.0887, + "step": 4897 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011155030770380525, + "loss": 3.0535, + "step": 4898 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001113859363763376, + "loss": 3.0782, + "step": 4899 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001112216710589069, + "loss": 3.0684, + "step": 4900 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011105751179632318, + "loss": 3.0915, + "step": 4901 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011089345863336703, + "loss": 3.1393, + "step": 4902 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011072951161479083, + "loss": 3.0918, + "step": 4903 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011056567078531771, + "loss": 3.1304, + "step": 4904 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001104019361896414, + "loss": 3.1763, + "step": 4905 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001102383078724275, + "loss": 3.2028, + "step": 4906 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011007478587831222, + "loss": 3.2097, + "step": 4907 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010991137025190239, + "loss": 3.0088, + "step": 4908 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010974806103777635, + "loss": 3.0237, + "step": 4909 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001095848582804832, + "loss": 3.1475, + "step": 4910 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010942176202454302, + "loss": 3.2141, + "step": 4911 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010925877231444687, + "loss": 3.0692, + "step": 4912 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001090958891946568, + "loss": 3.2114, + "step": 4913 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010893311270960542, + "loss": 3.2398, + "step": 4914 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001087704429036967, + "loss": 3.1053, + "step": 4915 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010860787982130538, + "loss": 3.217, + "step": 4916 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001084454235067771, + "loss": 3.0064, + "step": 4917 + }, + { + "epoch": 0.79, + "learning_rate": 0.000108283074004428, + "loss": 3.1373, + "step": 4918 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010812083135854589, + "loss": 3.1779, + "step": 4919 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001079586956133885, + "loss": 3.0326, + "step": 4920 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010779666681318507, + "loss": 3.078, + "step": 4921 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010763474500213539, + "loss": 3.0738, + "step": 4922 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010747293022441024, + "loss": 3.1753, + "step": 4923 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010731122252415065, + "loss": 3.1369, + "step": 4924 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001071496219454695, + "loss": 3.1852, + "step": 4925 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010698812853244932, + "loss": 3.0958, + "step": 4926 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010682674232914407, + "loss": 3.0674, + "step": 4927 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010666546337957828, + "loss": 3.1281, + "step": 4928 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001065042917277474, + "loss": 3.2644, + "step": 4929 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010634322741761699, + "loss": 3.0315, + "step": 4930 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010618227049312423, + "loss": 3.2259, + "step": 4931 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010602142099817658, + "loss": 3.0497, + "step": 4932 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010586067897665186, + "loss": 3.1686, + "step": 4933 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010570004447239906, + "loss": 3.0573, + "step": 4934 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010553951752923779, + "loss": 3.1807, + "step": 4935 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010537909819095781, + "loss": 3.3023, + "step": 4936 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010521878650132027, + "loss": 3.1906, + "step": 4937 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010505858250405664, + "loss": 3.2826, + "step": 4938 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010489848624286868, + "loss": 3.0047, + "step": 4939 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010473849776142924, + "loss": 3.2077, + "step": 4940 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010457861710338167, + "loss": 3.1718, + "step": 4941 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001044188443123395, + "loss": 3.1703, + "step": 4942 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010425917943188728, + "loss": 3.1818, + "step": 4943 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010409962250558025, + "loss": 3.0449, + "step": 4944 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010394017357694368, + "loss": 3.0185, + "step": 4945 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010378083268947369, + "loss": 3.1774, + "step": 4946 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010362159988663699, + "loss": 3.0863, + "step": 4947 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010346247521187058, + "loss": 3.0478, + "step": 4948 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010330345870858194, + "loss": 2.9725, + "step": 4949 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010314455042014964, + "loss": 3.0298, + "step": 4950 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010298575038992186, + "loss": 3.1567, + "step": 4951 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010282705866121778, + "loss": 3.0787, + "step": 4952 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010266847527732714, + "loss": 3.2465, + "step": 4953 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010251000028150954, + "loss": 3.0574, + "step": 4954 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010235163371699541, + "loss": 3.1112, + "step": 4955 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010219337562698594, + "loss": 3.1573, + "step": 4956 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010203522605465204, + "loss": 3.094, + "step": 4957 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010187718504313537, + "loss": 3.1952, + "step": 4958 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010171925263554815, + "loss": 3.1114, + "step": 4959 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010156142887497244, + "loss": 3.099, + "step": 4960 + }, + { + "epoch": 0.8, + "learning_rate": 0.000101403713804461, + "loss": 3.211, + "step": 4961 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010124610746703738, + "loss": 3.1278, + "step": 4962 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010108860990569452, + "loss": 3.1937, + "step": 4963 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010093122116339642, + "loss": 3.2154, + "step": 4964 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010077394128307704, + "loss": 3.1238, + "step": 4965 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010061677030764105, + "loss": 3.1657, + "step": 4966 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001004597082799627, + "loss": 3.0789, + "step": 4967 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010030275524288718, + "loss": 3.1792, + "step": 4968 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010014591123922962, + "loss": 3.237, + "step": 4969 + }, + { + "epoch": 0.8, + "learning_rate": 9.998917631177557e-05, + "loss": 3.006, + "step": 4970 + }, + { + "epoch": 0.8, + "learning_rate": 9.983255050328077e-05, + "loss": 3.1297, + "step": 4971 + }, + { + "epoch": 0.8, + "learning_rate": 9.967603385647128e-05, + "loss": 3.1067, + "step": 4972 + }, + { + "epoch": 0.8, + "learning_rate": 9.9519626414043e-05, + "loss": 3.2081, + "step": 4973 + }, + { + "epoch": 0.8, + "learning_rate": 9.936332821866239e-05, + "loss": 3.2674, + "step": 4974 + }, + { + "epoch": 0.8, + "learning_rate": 9.92071393129662e-05, + "loss": 3.0694, + "step": 4975 + }, + { + "epoch": 0.8, + "learning_rate": 9.905105973956107e-05, + "loss": 3.2677, + "step": 4976 + }, + { + "epoch": 0.8, + "learning_rate": 9.8895089541024e-05, + "loss": 3.1751, + "step": 4977 + }, + { + "epoch": 0.8, + "learning_rate": 9.87392287599022e-05, + "loss": 3.0518, + "step": 4978 + }, + { + "epoch": 0.8, + "learning_rate": 9.858347743871277e-05, + "loss": 3.1192, + "step": 4979 + }, + { + "epoch": 0.8, + "learning_rate": 9.842783561994295e-05, + "loss": 3.2888, + "step": 4980 + }, + { + "epoch": 0.8, + "learning_rate": 9.827230334605086e-05, + "loss": 3.1125, + "step": 4981 + }, + { + "epoch": 0.8, + "learning_rate": 9.811688065946361e-05, + "loss": 3.3754, + "step": 4982 + }, + { + "epoch": 0.8, + "learning_rate": 9.796156760257912e-05, + "loss": 3.1284, + "step": 4983 + }, + { + "epoch": 0.8, + "learning_rate": 9.78063642177654e-05, + "loss": 3.0273, + "step": 4984 + }, + { + "epoch": 0.8, + "learning_rate": 9.765127054736011e-05, + "loss": 3.1209, + "step": 4985 + }, + { + "epoch": 0.8, + "learning_rate": 9.749628663367127e-05, + "loss": 2.9797, + "step": 4986 + }, + { + "epoch": 0.8, + "learning_rate": 9.734141251897733e-05, + "loss": 3.3267, + "step": 4987 + }, + { + "epoch": 0.8, + "learning_rate": 9.718664824552604e-05, + "loss": 3.1869, + "step": 4988 + }, + { + "epoch": 0.8, + "learning_rate": 9.703199385553558e-05, + "loss": 3.1266, + "step": 4989 + }, + { + "epoch": 0.8, + "learning_rate": 9.68774493911943e-05, + "loss": 3.0127, + "step": 4990 + }, + { + "epoch": 0.8, + "learning_rate": 9.672301489466023e-05, + "loss": 3.1857, + "step": 4991 + }, + { + "epoch": 0.8, + "learning_rate": 9.656869040806155e-05, + "loss": 3.2729, + "step": 4992 + }, + { + "epoch": 0.81, + "learning_rate": 9.641447597349651e-05, + "loss": 3.2776, + "step": 4993 + }, + { + "epoch": 0.81, + "learning_rate": 9.626037163303319e-05, + "loss": 3.0817, + "step": 4994 + }, + { + "epoch": 0.81, + "learning_rate": 9.610637742870987e-05, + "loss": 3.0875, + "step": 4995 + }, + { + "epoch": 0.81, + "learning_rate": 9.595249340253459e-05, + "loss": 3.168, + "step": 4996 + }, + { + "epoch": 0.81, + "learning_rate": 9.579871959648523e-05, + "loss": 2.8711, + "step": 4997 + }, + { + "epoch": 0.81, + "learning_rate": 9.564505605250984e-05, + "loss": 2.9325, + "step": 4998 + }, + { + "epoch": 0.81, + "learning_rate": 9.549150281252633e-05, + "loss": 3.0769, + "step": 4999 + }, + { + "epoch": 0.81, + "learning_rate": 9.533805991842242e-05, + "loss": 3.1756, + "step": 5000 + }, + { + "epoch": 0.81, + "learning_rate": 9.518472741205586e-05, + "loss": 3.1727, + "step": 5001 + }, + { + "epoch": 0.81, + "learning_rate": 9.503150533525435e-05, + "loss": 3.1319, + "step": 5002 + }, + { + "epoch": 0.81, + "learning_rate": 9.487839372981505e-05, + "loss": 3.1683, + "step": 5003 + }, + { + "epoch": 0.81, + "learning_rate": 9.472539263750546e-05, + "loss": 3.1614, + "step": 5004 + }, + { + "epoch": 0.81, + "learning_rate": 9.457250210006274e-05, + "loss": 3.0727, + "step": 5005 + }, + { + "epoch": 0.81, + "learning_rate": 9.441972215919387e-05, + "loss": 3.1181, + "step": 5006 + }, + { + "epoch": 0.81, + "learning_rate": 9.426705285657567e-05, + "loss": 3.1921, + "step": 5007 + }, + { + "epoch": 0.81, + "learning_rate": 9.411449423385499e-05, + "loss": 3.108, + "step": 5008 + }, + { + "epoch": 0.81, + "learning_rate": 9.396204633264798e-05, + "loss": 3.0903, + "step": 5009 + }, + { + "epoch": 0.81, + "learning_rate": 9.380970919454113e-05, + "loss": 3.0797, + "step": 5010 + }, + { + "epoch": 0.81, + "learning_rate": 9.365748286109044e-05, + "loss": 3.0541, + "step": 5011 + }, + { + "epoch": 0.81, + "learning_rate": 9.35053673738217e-05, + "loss": 3.1747, + "step": 5012 + }, + { + "epoch": 0.81, + "learning_rate": 9.335336277423051e-05, + "loss": 3.0877, + "step": 5013 + }, + { + "epoch": 0.81, + "learning_rate": 9.320146910378224e-05, + "loss": 3.2224, + "step": 5014 + }, + { + "epoch": 0.81, + "learning_rate": 9.30496864039121e-05, + "loss": 3.2575, + "step": 5015 + }, + { + "epoch": 0.81, + "learning_rate": 9.289801471602455e-05, + "loss": 3.0143, + "step": 5016 + }, + { + "epoch": 0.81, + "learning_rate": 9.274645408149435e-05, + "loss": 3.1051, + "step": 5017 + }, + { + "epoch": 0.81, + "learning_rate": 9.25950045416657e-05, + "loss": 3.1506, + "step": 5018 + }, + { + "epoch": 0.81, + "learning_rate": 9.244366613785249e-05, + "loss": 3.1688, + "step": 5019 + }, + { + "epoch": 0.81, + "learning_rate": 9.229243891133832e-05, + "loss": 3.0318, + "step": 5020 + }, + { + "epoch": 0.81, + "learning_rate": 9.214132290337663e-05, + "loss": 3.1099, + "step": 5021 + }, + { + "epoch": 0.81, + "learning_rate": 9.19903181551901e-05, + "loss": 3.1499, + "step": 5022 + }, + { + "epoch": 0.81, + "learning_rate": 9.183942470797141e-05, + "loss": 3.1946, + "step": 5023 + }, + { + "epoch": 0.81, + "learning_rate": 9.168864260288285e-05, + "loss": 3.2483, + "step": 5024 + }, + { + "epoch": 0.81, + "learning_rate": 9.153797188105623e-05, + "loss": 3.1258, + "step": 5025 + }, + { + "epoch": 0.81, + "learning_rate": 9.138741258359295e-05, + "loss": 2.9609, + "step": 5026 + }, + { + "epoch": 0.81, + "learning_rate": 9.123696475156434e-05, + "loss": 2.9939, + "step": 5027 + }, + { + "epoch": 0.81, + "learning_rate": 9.108662842601079e-05, + "loss": 3.0845, + "step": 5028 + }, + { + "epoch": 0.81, + "learning_rate": 9.093640364794258e-05, + "loss": 3.0058, + "step": 5029 + }, + { + "epoch": 0.81, + "learning_rate": 9.078629045833964e-05, + "loss": 3.0417, + "step": 5030 + }, + { + "epoch": 0.81, + "learning_rate": 9.063628889815128e-05, + "loss": 3.0204, + "step": 5031 + }, + { + "epoch": 0.81, + "learning_rate": 9.048639900829642e-05, + "loss": 3.0949, + "step": 5032 + }, + { + "epoch": 0.81, + "learning_rate": 9.03366208296637e-05, + "loss": 3.0728, + "step": 5033 + }, + { + "epoch": 0.81, + "learning_rate": 9.018695440311087e-05, + "loss": 3.1579, + "step": 5034 + }, + { + "epoch": 0.81, + "learning_rate": 9.003739976946552e-05, + "loss": 3.1137, + "step": 5035 + }, + { + "epoch": 0.81, + "learning_rate": 8.988795696952462e-05, + "loss": 3.0077, + "step": 5036 + }, + { + "epoch": 0.81, + "learning_rate": 8.97386260440548e-05, + "loss": 3.1329, + "step": 5037 + }, + { + "epoch": 0.81, + "learning_rate": 8.958940703379192e-05, + "loss": 3.3283, + "step": 5038 + }, + { + "epoch": 0.81, + "learning_rate": 8.944029997944165e-05, + "loss": 3.1693, + "step": 5039 + }, + { + "epoch": 0.81, + "learning_rate": 8.929130492167864e-05, + "loss": 3.1059, + "step": 5040 + }, + { + "epoch": 0.81, + "learning_rate": 8.91424219011473e-05, + "loss": 3.0857, + "step": 5041 + }, + { + "epoch": 0.81, + "learning_rate": 8.899365095846167e-05, + "loss": 3.2368, + "step": 5042 + }, + { + "epoch": 0.81, + "learning_rate": 8.88449921342045e-05, + "loss": 3.0072, + "step": 5043 + }, + { + "epoch": 0.81, + "learning_rate": 8.869644546892891e-05, + "loss": 3.053, + "step": 5044 + }, + { + "epoch": 0.81, + "learning_rate": 8.854801100315685e-05, + "loss": 3.1801, + "step": 5045 + }, + { + "epoch": 0.81, + "learning_rate": 8.839968877737958e-05, + "loss": 3.201, + "step": 5046 + }, + { + "epoch": 0.81, + "learning_rate": 8.825147883205804e-05, + "loss": 3.121, + "step": 5047 + }, + { + "epoch": 0.81, + "learning_rate": 8.810338120762235e-05, + "loss": 3.1162, + "step": 5048 + }, + { + "epoch": 0.81, + "learning_rate": 8.795539594447216e-05, + "loss": 3.1618, + "step": 5049 + }, + { + "epoch": 0.81, + "learning_rate": 8.780752308297629e-05, + "loss": 3.1336, + "step": 5050 + }, + { + "epoch": 0.81, + "learning_rate": 8.76597626634732e-05, + "loss": 3.0342, + "step": 5051 + }, + { + "epoch": 0.81, + "learning_rate": 8.751211472627014e-05, + "loss": 3.1021, + "step": 5052 + }, + { + "epoch": 0.81, + "learning_rate": 8.736457931164416e-05, + "loss": 3.2496, + "step": 5053 + }, + { + "epoch": 0.81, + "learning_rate": 8.721715645984135e-05, + "loss": 2.9835, + "step": 5054 + }, + { + "epoch": 0.82, + "learning_rate": 8.706984621107733e-05, + "loss": 3.2831, + "step": 5055 + }, + { + "epoch": 0.82, + "learning_rate": 8.692264860553673e-05, + "loss": 3.249, + "step": 5056 + }, + { + "epoch": 0.82, + "learning_rate": 8.677556368337386e-05, + "loss": 2.9502, + "step": 5057 + }, + { + "epoch": 0.82, + "learning_rate": 8.662859148471164e-05, + "loss": 3.0645, + "step": 5058 + }, + { + "epoch": 0.82, + "learning_rate": 8.648173204964277e-05, + "loss": 3.3553, + "step": 5059 + }, + { + "epoch": 0.82, + "learning_rate": 8.633498541822909e-05, + "loss": 3.0614, + "step": 5060 + }, + { + "epoch": 0.82, + "learning_rate": 8.61883516305016e-05, + "loss": 2.975, + "step": 5061 + }, + { + "epoch": 0.82, + "learning_rate": 8.604183072646055e-05, + "loss": 3.1529, + "step": 5062 + }, + { + "epoch": 0.82, + "learning_rate": 8.589542274607543e-05, + "loss": 3.2732, + "step": 5063 + }, + { + "epoch": 0.82, + "learning_rate": 8.574912772928461e-05, + "loss": 3.1847, + "step": 5064 + }, + { + "epoch": 0.82, + "learning_rate": 8.56029457159962e-05, + "loss": 3.0164, + "step": 5065 + }, + { + "epoch": 0.82, + "learning_rate": 8.545687674608704e-05, + "loss": 3.0532, + "step": 5066 + }, + { + "epoch": 0.82, + "learning_rate": 8.531092085940345e-05, + "loss": 3.1011, + "step": 5067 + }, + { + "epoch": 0.82, + "learning_rate": 8.516507809576041e-05, + "loss": 3.01, + "step": 5068 + }, + { + "epoch": 0.82, + "learning_rate": 8.50193484949427e-05, + "loss": 3.2182, + "step": 5069 + }, + { + "epoch": 0.82, + "learning_rate": 8.487373209670391e-05, + "loss": 3.0786, + "step": 5070 + }, + { + "epoch": 0.82, + "learning_rate": 8.472822894076643e-05, + "loss": 3.1203, + "step": 5071 + }, + { + "epoch": 0.82, + "learning_rate": 8.458283906682229e-05, + "loss": 3.1015, + "step": 5072 + }, + { + "epoch": 0.82, + "learning_rate": 8.443756251453249e-05, + "loss": 3.2932, + "step": 5073 + }, + { + "epoch": 0.82, + "learning_rate": 8.429239932352666e-05, + "loss": 3.1192, + "step": 5074 + }, + { + "epoch": 0.82, + "learning_rate": 8.414734953340419e-05, + "loss": 3.2206, + "step": 5075 + }, + { + "epoch": 0.82, + "learning_rate": 8.400241318373331e-05, + "loss": 3.1854, + "step": 5076 + }, + { + "epoch": 0.82, + "learning_rate": 8.385759031405082e-05, + "loss": 3.3214, + "step": 5077 + }, + { + "epoch": 0.82, + "learning_rate": 8.371288096386321e-05, + "loss": 3.2587, + "step": 5078 + }, + { + "epoch": 0.82, + "learning_rate": 8.35682851726458e-05, + "loss": 3.1495, + "step": 5079 + }, + { + "epoch": 0.82, + "learning_rate": 8.342380297984253e-05, + "loss": 3.0329, + "step": 5080 + }, + { + "epoch": 0.82, + "learning_rate": 8.327943442486708e-05, + "loss": 3.114, + "step": 5081 + }, + { + "epoch": 0.82, + "learning_rate": 8.31351795471017e-05, + "loss": 3.0755, + "step": 5082 + }, + { + "epoch": 0.82, + "learning_rate": 8.29910383858975e-05, + "loss": 3.2667, + "step": 5083 + }, + { + "epoch": 0.82, + "learning_rate": 8.284701098057485e-05, + "loss": 3.1781, + "step": 5084 + }, + { + "epoch": 0.82, + "learning_rate": 8.270309737042308e-05, + "loss": 3.3113, + "step": 5085 + }, + { + "epoch": 0.82, + "learning_rate": 8.255929759470004e-05, + "loss": 3.1095, + "step": 5086 + }, + { + "epoch": 0.82, + "learning_rate": 8.241561169263329e-05, + "loss": 3.2347, + "step": 5087 + }, + { + "epoch": 0.82, + "learning_rate": 8.227203970341879e-05, + "loss": 3.0496, + "step": 5088 + }, + { + "epoch": 0.82, + "learning_rate": 8.21285816662214e-05, + "loss": 3.0478, + "step": 5089 + }, + { + "epoch": 0.82, + "learning_rate": 8.198523762017512e-05, + "loss": 3.002, + "step": 5090 + }, + { + "epoch": 0.82, + "learning_rate": 8.184200760438298e-05, + "loss": 3.2932, + "step": 5091 + }, + { + "epoch": 0.82, + "learning_rate": 8.169889165791633e-05, + "loss": 3.1013, + "step": 5092 + }, + { + "epoch": 0.82, + "learning_rate": 8.155588981981583e-05, + "loss": 3.1271, + "step": 5093 + }, + { + "epoch": 0.82, + "learning_rate": 8.141300212909131e-05, + "loss": 3.2487, + "step": 5094 + }, + { + "epoch": 0.82, + "learning_rate": 8.127022862472077e-05, + "loss": 3.1907, + "step": 5095 + }, + { + "epoch": 0.82, + "learning_rate": 8.112756934565146e-05, + "loss": 2.9575, + "step": 5096 + }, + { + "epoch": 0.82, + "learning_rate": 8.098502433079963e-05, + "loss": 3.2369, + "step": 5097 + }, + { + "epoch": 0.82, + "learning_rate": 8.084259361904977e-05, + "loss": 3.0907, + "step": 5098 + }, + { + "epoch": 0.82, + "learning_rate": 8.070027724925565e-05, + "loss": 3.0067, + "step": 5099 + }, + { + "epoch": 0.82, + "learning_rate": 8.055807526024005e-05, + "loss": 3.1305, + "step": 5100 + }, + { + "epoch": 0.82, + "learning_rate": 8.041598769079395e-05, + "loss": 3.1364, + "step": 5101 + }, + { + "epoch": 0.82, + "learning_rate": 8.027401457967748e-05, + "loss": 3.2659, + "step": 5102 + }, + { + "epoch": 0.82, + "learning_rate": 8.013215596561957e-05, + "loss": 3.1394, + "step": 5103 + }, + { + "epoch": 0.82, + "learning_rate": 7.999041188731787e-05, + "loss": 3.1791, + "step": 5104 + }, + { + "epoch": 0.82, + "learning_rate": 7.984878238343846e-05, + "loss": 3.0557, + "step": 5105 + }, + { + "epoch": 0.82, + "learning_rate": 7.970726749261687e-05, + "loss": 2.9985, + "step": 5106 + }, + { + "epoch": 0.82, + "learning_rate": 7.95658672534566e-05, + "loss": 3.1085, + "step": 5107 + }, + { + "epoch": 0.82, + "learning_rate": 7.942458170453043e-05, + "loss": 3.0313, + "step": 5108 + }, + { + "epoch": 0.82, + "learning_rate": 7.928341088437952e-05, + "loss": 3.0882, + "step": 5109 + }, + { + "epoch": 0.82, + "learning_rate": 7.914235483151405e-05, + "loss": 3.1335, + "step": 5110 + }, + { + "epoch": 0.82, + "learning_rate": 7.900141358441232e-05, + "loss": 3.1116, + "step": 5111 + }, + { + "epoch": 0.82, + "learning_rate": 7.886058718152222e-05, + "loss": 3.0612, + "step": 5112 + }, + { + "epoch": 0.82, + "learning_rate": 7.871987566125938e-05, + "loss": 3.1548, + "step": 5113 + }, + { + "epoch": 0.82, + "learning_rate": 7.857927906200863e-05, + "loss": 3.0652, + "step": 5114 + }, + { + "epoch": 0.82, + "learning_rate": 7.843879742212334e-05, + "loss": 3.3375, + "step": 5115 + }, + { + "epoch": 0.82, + "learning_rate": 7.82984307799256e-05, + "loss": 3.1351, + "step": 5116 + }, + { + "epoch": 0.83, + "learning_rate": 7.815817917370577e-05, + "loss": 3.4055, + "step": 5117 + }, + { + "epoch": 0.83, + "learning_rate": 7.801804264172313e-05, + "loss": 3.0711, + "step": 5118 + }, + { + "epoch": 0.83, + "learning_rate": 7.787802122220583e-05, + "loss": 2.9377, + "step": 5119 + }, + { + "epoch": 0.83, + "learning_rate": 7.773811495334999e-05, + "loss": 3.3156, + "step": 5120 + }, + { + "epoch": 0.83, + "learning_rate": 7.759832387332078e-05, + "loss": 3.1588, + "step": 5121 + }, + { + "epoch": 0.83, + "learning_rate": 7.745864802025194e-05, + "loss": 3.1414, + "step": 5122 + }, + { + "epoch": 0.83, + "learning_rate": 7.73190874322453e-05, + "loss": 3.1297, + "step": 5123 + }, + { + "epoch": 0.83, + "learning_rate": 7.717964214737178e-05, + "loss": 3.1544, + "step": 5124 + }, + { + "epoch": 0.83, + "learning_rate": 7.704031220367086e-05, + "loss": 3.1058, + "step": 5125 + }, + { + "epoch": 0.83, + "learning_rate": 7.690109763915004e-05, + "loss": 3.2174, + "step": 5126 + }, + { + "epoch": 0.83, + "learning_rate": 7.676199849178583e-05, + "loss": 3.2697, + "step": 5127 + }, + { + "epoch": 0.83, + "learning_rate": 7.662301479952317e-05, + "loss": 3.0165, + "step": 5128 + }, + { + "epoch": 0.83, + "learning_rate": 7.64841466002752e-05, + "loss": 3.0727, + "step": 5129 + }, + { + "epoch": 0.83, + "learning_rate": 7.63453939319238e-05, + "loss": 3.1139, + "step": 5130 + }, + { + "epoch": 0.83, + "learning_rate": 7.620675683231959e-05, + "loss": 3.0211, + "step": 5131 + }, + { + "epoch": 0.83, + "learning_rate": 7.60682353392811e-05, + "loss": 3.2531, + "step": 5132 + }, + { + "epoch": 0.83, + "learning_rate": 7.592982949059567e-05, + "loss": 3.1231, + "step": 5133 + }, + { + "epoch": 0.83, + "learning_rate": 7.579153932401928e-05, + "loss": 3.2348, + "step": 5134 + }, + { + "epoch": 0.83, + "learning_rate": 7.56533648772757e-05, + "loss": 3.1262, + "step": 5135 + }, + { + "epoch": 0.83, + "learning_rate": 7.551530618805768e-05, + "loss": 3.1831, + "step": 5136 + }, + { + "epoch": 0.83, + "learning_rate": 7.53773632940265e-05, + "loss": 3.192, + "step": 5137 + }, + { + "epoch": 0.83, + "learning_rate": 7.523953623281132e-05, + "loss": 3.1427, + "step": 5138 + }, + { + "epoch": 0.83, + "learning_rate": 7.510182504201013e-05, + "loss": 3.112, + "step": 5139 + }, + { + "epoch": 0.83, + "learning_rate": 7.496422975918915e-05, + "loss": 3.0814, + "step": 5140 + }, + { + "epoch": 0.83, + "learning_rate": 7.482675042188292e-05, + "loss": 3.0157, + "step": 5141 + }, + { + "epoch": 0.83, + "learning_rate": 7.468938706759448e-05, + "loss": 3.1684, + "step": 5142 + }, + { + "epoch": 0.83, + "learning_rate": 7.455213973379516e-05, + "loss": 2.9667, + "step": 5143 + }, + { + "epoch": 0.83, + "learning_rate": 7.441500845792471e-05, + "loss": 3.2422, + "step": 5144 + }, + { + "epoch": 0.83, + "learning_rate": 7.42779932773911e-05, + "loss": 2.9819, + "step": 5145 + }, + { + "epoch": 0.83, + "learning_rate": 7.414109422957088e-05, + "loss": 3.1231, + "step": 5146 + }, + { + "epoch": 0.83, + "learning_rate": 7.400431135180852e-05, + "loss": 3.0518, + "step": 5147 + }, + { + "epoch": 0.83, + "learning_rate": 7.38676446814171e-05, + "loss": 3.061, + "step": 5148 + }, + { + "epoch": 0.83, + "learning_rate": 7.373109425567797e-05, + "loss": 3.0037, + "step": 5149 + }, + { + "epoch": 0.83, + "learning_rate": 7.359466011184068e-05, + "loss": 3.1321, + "step": 5150 + }, + { + "epoch": 0.83, + "learning_rate": 7.345834228712311e-05, + "loss": 3.102, + "step": 5151 + }, + { + "epoch": 0.83, + "learning_rate": 7.332214081871141e-05, + "loss": 3.0638, + "step": 5152 + }, + { + "epoch": 0.83, + "learning_rate": 7.318605574376014e-05, + "loss": 3.0269, + "step": 5153 + }, + { + "epoch": 0.83, + "learning_rate": 7.30500870993916e-05, + "loss": 3.1972, + "step": 5154 + }, + { + "epoch": 0.83, + "learning_rate": 7.291423492269694e-05, + "loss": 3.1706, + "step": 5155 + }, + { + "epoch": 0.83, + "learning_rate": 7.277849925073515e-05, + "loss": 3.2558, + "step": 5156 + }, + { + "epoch": 0.83, + "learning_rate": 7.264288012053366e-05, + "loss": 3.2528, + "step": 5157 + }, + { + "epoch": 0.83, + "learning_rate": 7.250737756908794e-05, + "loss": 3.181, + "step": 5158 + }, + { + "epoch": 0.83, + "learning_rate": 7.237199163336189e-05, + "loss": 3.0513, + "step": 5159 + }, + { + "epoch": 0.83, + "learning_rate": 7.223672235028727e-05, + "loss": 3.2261, + "step": 5160 + }, + { + "epoch": 0.83, + "learning_rate": 7.210156975676418e-05, + "loss": 3.0933, + "step": 5161 + }, + { + "epoch": 0.83, + "learning_rate": 7.196653388966095e-05, + "loss": 3.195, + "step": 5162 + }, + { + "epoch": 0.83, + "learning_rate": 7.183161478581406e-05, + "loss": 3.2378, + "step": 5163 + }, + { + "epoch": 0.83, + "learning_rate": 7.169681248202808e-05, + "loss": 3.1512, + "step": 5164 + }, + { + "epoch": 0.83, + "learning_rate": 7.15621270150758e-05, + "loss": 3.1431, + "step": 5165 + }, + { + "epoch": 0.83, + "learning_rate": 7.142755842169785e-05, + "loss": 3.2168, + "step": 5166 + }, + { + "epoch": 0.83, + "learning_rate": 7.129310673860334e-05, + "loss": 2.9561, + "step": 5167 + }, + { + "epoch": 0.83, + "learning_rate": 7.11587720024694e-05, + "loss": 3.1838, + "step": 5168 + }, + { + "epoch": 0.83, + "learning_rate": 7.102455424994108e-05, + "loss": 3.1856, + "step": 5169 + }, + { + "epoch": 0.83, + "learning_rate": 7.089045351763174e-05, + "loss": 3.112, + "step": 5170 + }, + { + "epoch": 0.83, + "learning_rate": 7.075646984212275e-05, + "loss": 3.0103, + "step": 5171 + }, + { + "epoch": 0.83, + "learning_rate": 7.062260325996339e-05, + "loss": 3.0474, + "step": 5172 + }, + { + "epoch": 0.83, + "learning_rate": 7.048885380767123e-05, + "loss": 3.2045, + "step": 5173 + }, + { + "epoch": 0.83, + "learning_rate": 7.035522152173168e-05, + "loss": 3.1105, + "step": 5174 + }, + { + "epoch": 0.83, + "learning_rate": 7.022170643859838e-05, + "loss": 3.2056, + "step": 5175 + }, + { + "epoch": 0.83, + "learning_rate": 7.008830859469296e-05, + "loss": 3.1943, + "step": 5176 + }, + { + "epoch": 0.83, + "learning_rate": 6.995502802640497e-05, + "loss": 3.2604, + "step": 5177 + }, + { + "epoch": 0.83, + "learning_rate": 6.982186477009194e-05, + "loss": 3.2211, + "step": 5178 + }, + { + "epoch": 0.84, + "learning_rate": 6.968881886207956e-05, + "loss": 3.1681, + "step": 5179 + }, + { + "epoch": 0.84, + "learning_rate": 6.955589033866139e-05, + "loss": 3.0491, + "step": 5180 + }, + { + "epoch": 0.84, + "learning_rate": 6.942307923609904e-05, + "loss": 3.2136, + "step": 5181 + }, + { + "epoch": 0.84, + "learning_rate": 6.929038559062201e-05, + "loss": 3.2473, + "step": 5182 + }, + { + "epoch": 0.84, + "learning_rate": 6.915780943842792e-05, + "loss": 3.1467, + "step": 5183 + }, + { + "epoch": 0.84, + "learning_rate": 6.9025350815682e-05, + "loss": 3.1693, + "step": 5184 + }, + { + "epoch": 0.84, + "learning_rate": 6.889300975851781e-05, + "loss": 3.158, + "step": 5185 + }, + { + "epoch": 0.84, + "learning_rate": 6.876078630303661e-05, + "loss": 3.3385, + "step": 5186 + }, + { + "epoch": 0.84, + "learning_rate": 6.862868048530768e-05, + "loss": 3.1976, + "step": 5187 + }, + { + "epoch": 0.84, + "learning_rate": 6.849669234136813e-05, + "loss": 3.1504, + "step": 5188 + }, + { + "epoch": 0.84, + "learning_rate": 6.836482190722309e-05, + "loss": 2.9868, + "step": 5189 + }, + { + "epoch": 0.84, + "learning_rate": 6.823306921884537e-05, + "loss": 3.0692, + "step": 5190 + }, + { + "epoch": 0.84, + "learning_rate": 6.810143431217585e-05, + "loss": 3.1256, + "step": 5191 + }, + { + "epoch": 0.84, + "learning_rate": 6.796991722312318e-05, + "loss": 3.2791, + "step": 5192 + }, + { + "epoch": 0.84, + "learning_rate": 6.783851798756396e-05, + "loss": 3.0654, + "step": 5193 + }, + { + "epoch": 0.84, + "learning_rate": 6.770723664134254e-05, + "loss": 3.1216, + "step": 5194 + }, + { + "epoch": 0.84, + "learning_rate": 6.757607322027132e-05, + "loss": 3.2263, + "step": 5195 + }, + { + "epoch": 0.84, + "learning_rate": 6.744502776013018e-05, + "loss": 3.2164, + "step": 5196 + }, + { + "epoch": 0.84, + "learning_rate": 6.731410029666701e-05, + "loss": 3.2069, + "step": 5197 + }, + { + "epoch": 0.84, + "learning_rate": 6.71832908655976e-05, + "loss": 3.1535, + "step": 5198 + }, + { + "epoch": 0.84, + "learning_rate": 6.70525995026055e-05, + "loss": 3.1078, + "step": 5199 + }, + { + "epoch": 0.84, + "learning_rate": 6.692202624334187e-05, + "loss": 3.1529, + "step": 5200 + }, + { + "epoch": 0.84, + "learning_rate": 6.679157112342604e-05, + "loss": 3.1202, + "step": 5201 + }, + { + "epoch": 0.84, + "learning_rate": 6.666123417844456e-05, + "loss": 3.0805, + "step": 5202 + }, + { + "epoch": 0.84, + "learning_rate": 6.653101544395218e-05, + "loss": 3.2423, + "step": 5203 + }, + { + "epoch": 0.84, + "learning_rate": 6.640091495547129e-05, + "loss": 3.0891, + "step": 5204 + }, + { + "epoch": 0.84, + "learning_rate": 6.627093274849194e-05, + "loss": 3.1721, + "step": 5205 + }, + { + "epoch": 0.84, + "learning_rate": 6.61410688584721e-05, + "loss": 3.1488, + "step": 5206 + }, + { + "epoch": 0.84, + "learning_rate": 6.60113233208372e-05, + "loss": 2.9562, + "step": 5207 + }, + { + "epoch": 0.84, + "learning_rate": 6.588169617098071e-05, + "loss": 3.169, + "step": 5208 + }, + { + "epoch": 0.84, + "learning_rate": 6.575218744426348e-05, + "loss": 3.1151, + "step": 5209 + }, + { + "epoch": 0.84, + "learning_rate": 6.562279717601415e-05, + "loss": 3.0651, + "step": 5210 + }, + { + "epoch": 0.84, + "learning_rate": 6.54935254015292e-05, + "loss": 3.168, + "step": 5211 + }, + { + "epoch": 0.84, + "learning_rate": 6.536437215607261e-05, + "loss": 3.1644, + "step": 5212 + }, + { + "epoch": 0.84, + "learning_rate": 6.52353374748762e-05, + "loss": 3.2229, + "step": 5213 + }, + { + "epoch": 0.84, + "learning_rate": 6.510642139313933e-05, + "loss": 3.3354, + "step": 5214 + }, + { + "epoch": 0.84, + "learning_rate": 6.49776239460289e-05, + "loss": 2.9036, + "step": 5215 + }, + { + "epoch": 0.84, + "learning_rate": 6.484894516867962e-05, + "loss": 3.2851, + "step": 5216 + }, + { + "epoch": 0.84, + "learning_rate": 6.472038509619388e-05, + "loss": 3.3044, + "step": 5217 + }, + { + "epoch": 0.84, + "learning_rate": 6.45919437636413e-05, + "loss": 3.1882, + "step": 5218 + }, + { + "epoch": 0.84, + "learning_rate": 6.446362120605969e-05, + "loss": 3.0779, + "step": 5219 + }, + { + "epoch": 0.84, + "learning_rate": 6.433541745845417e-05, + "loss": 3.2151, + "step": 5220 + }, + { + "epoch": 0.84, + "learning_rate": 6.42073325557972e-05, + "loss": 3.1287, + "step": 5221 + }, + { + "epoch": 0.84, + "learning_rate": 6.407936653302926e-05, + "loss": 3.0623, + "step": 5222 + }, + { + "epoch": 0.84, + "learning_rate": 6.395151942505822e-05, + "loss": 3.2403, + "step": 5223 + }, + { + "epoch": 0.84, + "learning_rate": 6.382379126675919e-05, + "loss": 3.1908, + "step": 5224 + }, + { + "epoch": 0.84, + "learning_rate": 6.369618209297546e-05, + "loss": 3.1351, + "step": 5225 + }, + { + "epoch": 0.84, + "learning_rate": 6.356869193851755e-05, + "loss": 3.2411, + "step": 5226 + }, + { + "epoch": 0.84, + "learning_rate": 6.344132083816328e-05, + "loss": 3.1305, + "step": 5227 + }, + { + "epoch": 0.84, + "learning_rate": 6.331406882665836e-05, + "loss": 3.0891, + "step": 5228 + }, + { + "epoch": 0.84, + "learning_rate": 6.318693593871593e-05, + "loss": 3.2478, + "step": 5229 + }, + { + "epoch": 0.84, + "learning_rate": 6.305992220901624e-05, + "loss": 3.1533, + "step": 5230 + }, + { + "epoch": 0.84, + "learning_rate": 6.293302767220771e-05, + "loss": 3.1216, + "step": 5231 + }, + { + "epoch": 0.84, + "learning_rate": 6.280625236290593e-05, + "loss": 3.1764, + "step": 5232 + }, + { + "epoch": 0.84, + "learning_rate": 6.26795963156937e-05, + "loss": 3.2167, + "step": 5233 + }, + { + "epoch": 0.84, + "learning_rate": 6.255305956512159e-05, + "loss": 3.0672, + "step": 5234 + }, + { + "epoch": 0.84, + "learning_rate": 6.242664214570776e-05, + "loss": 3.258, + "step": 5235 + }, + { + "epoch": 0.84, + "learning_rate": 6.230034409193724e-05, + "loss": 3.2261, + "step": 5236 + }, + { + "epoch": 0.84, + "learning_rate": 6.21741654382632e-05, + "loss": 3.2774, + "step": 5237 + }, + { + "epoch": 0.84, + "learning_rate": 6.20481062191059e-05, + "loss": 3.0319, + "step": 5238 + }, + { + "epoch": 0.84, + "learning_rate": 6.19221664688529e-05, + "loss": 3.3377, + "step": 5239 + }, + { + "epoch": 0.84, + "learning_rate": 6.179634622185932e-05, + "loss": 3.2308, + "step": 5240 + }, + { + "epoch": 0.85, + "learning_rate": 6.167064551244772e-05, + "loss": 3.0471, + "step": 5241 + }, + { + "epoch": 0.85, + "learning_rate": 6.15450643749081e-05, + "loss": 3.0981, + "step": 5242 + }, + { + "epoch": 0.85, + "learning_rate": 6.14196028434974e-05, + "loss": 3.2595, + "step": 5243 + }, + { + "epoch": 0.85, + "learning_rate": 6.129426095244073e-05, + "loss": 3.0728, + "step": 5244 + }, + { + "epoch": 0.85, + "learning_rate": 6.116903873592977e-05, + "loss": 3.2367, + "step": 5245 + }, + { + "epoch": 0.85, + "learning_rate": 6.104393622812399e-05, + "loss": 3.112, + "step": 5246 + }, + { + "epoch": 0.85, + "learning_rate": 6.091895346315013e-05, + "loss": 2.9946, + "step": 5247 + }, + { + "epoch": 0.85, + "learning_rate": 6.079409047510231e-05, + "loss": 3.1833, + "step": 5248 + }, + { + "epoch": 0.85, + "learning_rate": 6.066934729804158e-05, + "loss": 3.0505, + "step": 5249 + }, + { + "epoch": 0.85, + "learning_rate": 6.054472396599714e-05, + "loss": 3.0997, + "step": 5250 + }, + { + "epoch": 0.85, + "learning_rate": 6.0420220512964585e-05, + "loss": 3.132, + "step": 5251 + }, + { + "epoch": 0.85, + "learning_rate": 6.029583697290736e-05, + "loss": 3.0132, + "step": 5252 + }, + { + "epoch": 0.85, + "learning_rate": 6.017157337975609e-05, + "loss": 3.1636, + "step": 5253 + }, + { + "epoch": 0.85, + "learning_rate": 6.004742976740868e-05, + "loss": 3.1478, + "step": 5254 + }, + { + "epoch": 0.85, + "learning_rate": 5.9923406169729966e-05, + "loss": 3.0455, + "step": 5255 + }, + { + "epoch": 0.85, + "learning_rate": 5.979950262055267e-05, + "loss": 3.077, + "step": 5256 + }, + { + "epoch": 0.85, + "learning_rate": 5.967571915367642e-05, + "loss": 3.056, + "step": 5257 + }, + { + "epoch": 0.85, + "learning_rate": 5.955205580286799e-05, + "loss": 3.1825, + "step": 5258 + }, + { + "epoch": 0.85, + "learning_rate": 5.94285126018615e-05, + "loss": 3.2461, + "step": 5259 + }, + { + "epoch": 0.85, + "learning_rate": 5.930508958435848e-05, + "loss": 3.0501, + "step": 5260 + }, + { + "epoch": 0.85, + "learning_rate": 5.918178678402714e-05, + "loss": 3.1652, + "step": 5261 + }, + { + "epoch": 0.85, + "learning_rate": 5.90586042345036e-05, + "loss": 3.0504, + "step": 5262 + }, + { + "epoch": 0.85, + "learning_rate": 5.8935541969390825e-05, + "loss": 3.1661, + "step": 5263 + }, + { + "epoch": 0.85, + "learning_rate": 5.8812600022258745e-05, + "loss": 3.2866, + "step": 5264 + }, + { + "epoch": 0.85, + "learning_rate": 5.8689778426644805e-05, + "loss": 3.1115, + "step": 5265 + }, + { + "epoch": 0.85, + "learning_rate": 5.856707721605359e-05, + "loss": 3.0553, + "step": 5266 + }, + { + "epoch": 0.85, + "learning_rate": 5.844449642395666e-05, + "loss": 3.1275, + "step": 5267 + }, + { + "epoch": 0.85, + "learning_rate": 5.832203608379272e-05, + "loss": 3.0551, + "step": 5268 + }, + { + "epoch": 0.85, + "learning_rate": 5.8199696228968036e-05, + "loss": 3.1618, + "step": 5269 + }, + { + "epoch": 0.85, + "learning_rate": 5.807747689285547e-05, + "loss": 3.1199, + "step": 5270 + }, + { + "epoch": 0.85, + "learning_rate": 5.795537810879531e-05, + "loss": 3.149, + "step": 5271 + }, + { + "epoch": 0.85, + "learning_rate": 5.7833399910094955e-05, + "loss": 3.3431, + "step": 5272 + }, + { + "epoch": 0.85, + "learning_rate": 5.771154233002862e-05, + "loss": 3.1143, + "step": 5273 + }, + { + "epoch": 0.85, + "learning_rate": 5.7589805401837894e-05, + "loss": 3.1932, + "step": 5274 + }, + { + "epoch": 0.85, + "learning_rate": 5.74681891587317e-05, + "loss": 3.2348, + "step": 5275 + }, + { + "epoch": 0.85, + "learning_rate": 5.7346693633885446e-05, + "loss": 3.1473, + "step": 5276 + }, + { + "epoch": 0.85, + "learning_rate": 5.722531886044191e-05, + "loss": 3.0744, + "step": 5277 + }, + { + "epoch": 0.85, + "learning_rate": 5.71040648715111e-05, + "loss": 3.1193, + "step": 5278 + }, + { + "epoch": 0.85, + "learning_rate": 5.698293170016966e-05, + "loss": 3.1755, + "step": 5279 + }, + { + "epoch": 0.85, + "learning_rate": 5.68619193794615e-05, + "loss": 3.1354, + "step": 5280 + }, + { + "epoch": 0.85, + "learning_rate": 5.6741027942397885e-05, + "loss": 2.9966, + "step": 5281 + }, + { + "epoch": 0.85, + "learning_rate": 5.662025742195654e-05, + "loss": 3.2396, + "step": 5282 + }, + { + "epoch": 0.85, + "learning_rate": 5.649960785108244e-05, + "loss": 3.199, + "step": 5283 + }, + { + "epoch": 0.85, + "learning_rate": 5.6379079262687813e-05, + "loss": 3.0118, + "step": 5284 + }, + { + "epoch": 0.85, + "learning_rate": 5.625867168965138e-05, + "loss": 3.1244, + "step": 5285 + }, + { + "epoch": 0.85, + "learning_rate": 5.613838516481912e-05, + "loss": 3.0405, + "step": 5286 + }, + { + "epoch": 0.85, + "learning_rate": 5.601821972100435e-05, + "loss": 3.1956, + "step": 5287 + }, + { + "epoch": 0.85, + "learning_rate": 5.5898175390986686e-05, + "loss": 3.171, + "step": 5288 + }, + { + "epoch": 0.85, + "learning_rate": 5.5778252207513094e-05, + "loss": 3.063, + "step": 5289 + }, + { + "epoch": 0.85, + "learning_rate": 5.565845020329741e-05, + "loss": 3.0836, + "step": 5290 + }, + { + "epoch": 0.85, + "learning_rate": 5.5538769411020596e-05, + "loss": 3.0852, + "step": 5291 + }, + { + "epoch": 0.85, + "learning_rate": 5.54192098633301e-05, + "loss": 3.1848, + "step": 5292 + }, + { + "epoch": 0.85, + "learning_rate": 5.529977159284072e-05, + "loss": 2.9598, + "step": 5293 + }, + { + "epoch": 0.85, + "learning_rate": 5.5180454632134006e-05, + "loss": 2.9875, + "step": 5294 + }, + { + "epoch": 0.85, + "learning_rate": 5.506125901375847e-05, + "loss": 3.0159, + "step": 5295 + }, + { + "epoch": 0.85, + "learning_rate": 5.494218477022939e-05, + "loss": 3.1757, + "step": 5296 + }, + { + "epoch": 0.85, + "learning_rate": 5.482323193402921e-05, + "loss": 3.2564, + "step": 5297 + }, + { + "epoch": 0.85, + "learning_rate": 5.470440053760689e-05, + "loss": 3.0884, + "step": 5298 + }, + { + "epoch": 0.85, + "learning_rate": 5.458569061337854e-05, + "loss": 3.1643, + "step": 5299 + }, + { + "epoch": 0.85, + "learning_rate": 5.446710219372697e-05, + "loss": 3.0618, + "step": 5300 + }, + { + "epoch": 0.85, + "learning_rate": 5.434863531100198e-05, + "loss": 3.161, + "step": 5301 + }, + { + "epoch": 0.85, + "learning_rate": 5.4230289997520166e-05, + "loss": 3.1078, + "step": 5302 + }, + { + "epoch": 0.86, + "learning_rate": 5.4112066285564975e-05, + "loss": 3.2769, + "step": 5303 + }, + { + "epoch": 0.86, + "learning_rate": 5.399396420738656e-05, + "loss": 2.9889, + "step": 5304 + }, + { + "epoch": 0.86, + "learning_rate": 5.387598379520203e-05, + "loss": 3.0165, + "step": 5305 + }, + { + "epoch": 0.86, + "learning_rate": 5.375812508119521e-05, + "loss": 3.0288, + "step": 5306 + }, + { + "epoch": 0.86, + "learning_rate": 5.3640388097516866e-05, + "loss": 3.3053, + "step": 5307 + }, + { + "epoch": 0.86, + "learning_rate": 5.352277287628449e-05, + "loss": 3.0723, + "step": 5308 + }, + { + "epoch": 0.86, + "learning_rate": 5.3405279449582345e-05, + "loss": 3.1045, + "step": 5309 + }, + { + "epoch": 0.86, + "learning_rate": 5.3287907849461304e-05, + "loss": 3.1271, + "step": 5310 + }, + { + "epoch": 0.86, + "learning_rate": 5.317065810793931e-05, + "loss": 3.0603, + "step": 5311 + }, + { + "epoch": 0.86, + "learning_rate": 5.3053530257000826e-05, + "loss": 3.0677, + "step": 5312 + }, + { + "epoch": 0.86, + "learning_rate": 5.29365243285973e-05, + "loss": 3.2249, + "step": 5313 + }, + { + "epoch": 0.86, + "learning_rate": 5.2819640354646645e-05, + "loss": 3.0821, + "step": 5314 + }, + { + "epoch": 0.86, + "learning_rate": 5.27028783670338e-05, + "loss": 3.0314, + "step": 5315 + }, + { + "epoch": 0.86, + "learning_rate": 5.258623839761012e-05, + "loss": 3.1329, + "step": 5316 + }, + { + "epoch": 0.86, + "learning_rate": 5.246972047819387e-05, + "loss": 3.1326, + "step": 5317 + }, + { + "epoch": 0.86, + "learning_rate": 5.2353324640569965e-05, + "loss": 3.2606, + "step": 5318 + }, + { + "epoch": 0.86, + "learning_rate": 5.2237050916490006e-05, + "loss": 3.2651, + "step": 5319 + }, + { + "epoch": 0.86, + "learning_rate": 5.212089933767239e-05, + "loss": 3.0767, + "step": 5320 + }, + { + "epoch": 0.86, + "learning_rate": 5.200486993580211e-05, + "loss": 3.1629, + "step": 5321 + }, + { + "epoch": 0.86, + "learning_rate": 5.1888962742530745e-05, + "loss": 3.282, + "step": 5322 + }, + { + "epoch": 0.86, + "learning_rate": 5.17731777894766e-05, + "loss": 3.1028, + "step": 5323 + }, + { + "epoch": 0.86, + "learning_rate": 5.1657515108224694e-05, + "loss": 3.2119, + "step": 5324 + }, + { + "epoch": 0.86, + "learning_rate": 5.154197473032668e-05, + "loss": 3.1275, + "step": 5325 + }, + { + "epoch": 0.86, + "learning_rate": 5.142655668730084e-05, + "loss": 3.0484, + "step": 5326 + }, + { + "epoch": 0.86, + "learning_rate": 5.1311261010632104e-05, + "loss": 3.2182, + "step": 5327 + }, + { + "epoch": 0.86, + "learning_rate": 5.1196087731771815e-05, + "loss": 3.191, + "step": 5328 + }, + { + "epoch": 0.86, + "learning_rate": 5.1081036882138166e-05, + "loss": 3.0079, + "step": 5329 + }, + { + "epoch": 0.86, + "learning_rate": 5.096610849311589e-05, + "loss": 3.0101, + "step": 5330 + }, + { + "epoch": 0.86, + "learning_rate": 5.08513025960563e-05, + "loss": 3.0847, + "step": 5331 + }, + { + "epoch": 0.86, + "learning_rate": 5.0736619222277346e-05, + "loss": 3.03, + "step": 5332 + }, + { + "epoch": 0.86, + "learning_rate": 5.062205840306355e-05, + "loss": 3.146, + "step": 5333 + }, + { + "epoch": 0.86, + "learning_rate": 5.0507620169665814e-05, + "loss": 2.9684, + "step": 5334 + }, + { + "epoch": 0.86, + "learning_rate": 5.0393304553301825e-05, + "loss": 3.0812, + "step": 5335 + }, + { + "epoch": 0.86, + "learning_rate": 5.027911158515569e-05, + "loss": 3.2237, + "step": 5336 + }, + { + "epoch": 0.86, + "learning_rate": 5.016504129637817e-05, + "loss": 3.1383, + "step": 5337 + }, + { + "epoch": 0.86, + "learning_rate": 5.005109371808647e-05, + "loss": 3.1091, + "step": 5338 + }, + { + "epoch": 0.86, + "learning_rate": 4.993726888136446e-05, + "loss": 3.024, + "step": 5339 + }, + { + "epoch": 0.86, + "learning_rate": 4.982356681726219e-05, + "loss": 3.2634, + "step": 5340 + }, + { + "epoch": 0.86, + "learning_rate": 4.970998755679662e-05, + "loss": 3.178, + "step": 5341 + }, + { + "epoch": 0.86, + "learning_rate": 4.9596531130951026e-05, + "loss": 3.2132, + "step": 5342 + }, + { + "epoch": 0.86, + "learning_rate": 4.94831975706751e-05, + "loss": 3.197, + "step": 5343 + }, + { + "epoch": 0.86, + "learning_rate": 4.936998690688521e-05, + "loss": 3.0834, + "step": 5344 + }, + { + "epoch": 0.86, + "learning_rate": 4.9256899170464056e-05, + "loss": 3.1816, + "step": 5345 + }, + { + "epoch": 0.86, + "learning_rate": 4.9143934392260946e-05, + "loss": 3.169, + "step": 5346 + }, + { + "epoch": 0.86, + "learning_rate": 4.90310926030913e-05, + "loss": 3.209, + "step": 5347 + }, + { + "epoch": 0.86, + "learning_rate": 4.891837383373737e-05, + "loss": 3.0332, + "step": 5348 + }, + { + "epoch": 0.86, + "learning_rate": 4.8805778114947744e-05, + "loss": 3.1495, + "step": 5349 + }, + { + "epoch": 0.86, + "learning_rate": 4.8693305477437335e-05, + "loss": 3.179, + "step": 5350 + }, + { + "epoch": 0.86, + "learning_rate": 4.858095595188766e-05, + "loss": 3.1582, + "step": 5351 + }, + { + "epoch": 0.86, + "learning_rate": 4.846872956894649e-05, + "loss": 3.2267, + "step": 5352 + }, + { + "epoch": 0.86, + "learning_rate": 4.835662635922805e-05, + "loss": 3.2132, + "step": 5353 + }, + { + "epoch": 0.86, + "learning_rate": 4.824464635331294e-05, + "loss": 3.1774, + "step": 5354 + }, + { + "epoch": 0.86, + "learning_rate": 4.8132789581748216e-05, + "loss": 3.0654, + "step": 5355 + }, + { + "epoch": 0.86, + "learning_rate": 4.802105607504731e-05, + "loss": 2.9699, + "step": 5356 + }, + { + "epoch": 0.86, + "learning_rate": 4.790944586369e-05, + "loss": 3.309, + "step": 5357 + }, + { + "epoch": 0.86, + "learning_rate": 4.7797958978122555e-05, + "loss": 3.1813, + "step": 5358 + }, + { + "epoch": 0.86, + "learning_rate": 4.7686595448757254e-05, + "loss": 3.1829, + "step": 5359 + }, + { + "epoch": 0.86, + "learning_rate": 4.757535530597307e-05, + "loss": 3.2379, + "step": 5360 + }, + { + "epoch": 0.86, + "learning_rate": 4.746423858011534e-05, + "loss": 3.1527, + "step": 5361 + }, + { + "epoch": 0.86, + "learning_rate": 4.735324530149521e-05, + "loss": 3.1598, + "step": 5362 + }, + { + "epoch": 0.86, + "learning_rate": 4.72423755003909e-05, + "loss": 3.1183, + "step": 5363 + }, + { + "epoch": 0.86, + "learning_rate": 4.713162920704656e-05, + "loss": 3.1192, + "step": 5364 + }, + { + "epoch": 0.87, + "learning_rate": 4.702100645167251e-05, + "loss": 2.943, + "step": 5365 + }, + { + "epoch": 0.87, + "learning_rate": 4.691050726444562e-05, + "loss": 3.2065, + "step": 5366 + }, + { + "epoch": 0.87, + "learning_rate": 4.6800131675509e-05, + "loss": 3.0614, + "step": 5367 + }, + { + "epoch": 0.87, + "learning_rate": 4.668987971497185e-05, + "loss": 3.0328, + "step": 5368 + }, + { + "epoch": 0.87, + "learning_rate": 4.657975141290993e-05, + "loss": 3.2564, + "step": 5369 + }, + { + "epoch": 0.87, + "learning_rate": 4.646974679936527e-05, + "loss": 3.022, + "step": 5370 + }, + { + "epoch": 0.87, + "learning_rate": 4.6359865904345765e-05, + "loss": 3.086, + "step": 5371 + }, + { + "epoch": 0.87, + "learning_rate": 4.625010875782598e-05, + "loss": 3.2474, + "step": 5372 + }, + { + "epoch": 0.87, + "learning_rate": 4.614047538974664e-05, + "loss": 3.3224, + "step": 5373 + }, + { + "epoch": 0.87, + "learning_rate": 4.603096583001432e-05, + "loss": 3.2119, + "step": 5374 + }, + { + "epoch": 0.87, + "learning_rate": 4.592158010850245e-05, + "loss": 3.1726, + "step": 5375 + }, + { + "epoch": 0.87, + "learning_rate": 4.581231825505033e-05, + "loss": 3.1038, + "step": 5376 + }, + { + "epoch": 0.87, + "learning_rate": 4.570318029946341e-05, + "loss": 3.1587, + "step": 5377 + }, + { + "epoch": 0.87, + "learning_rate": 4.559416627151336e-05, + "loss": 3.1657, + "step": 5378 + }, + { + "epoch": 0.87, + "learning_rate": 4.548527620093828e-05, + "loss": 3.1365, + "step": 5379 + }, + { + "epoch": 0.87, + "learning_rate": 4.5376510117442205e-05, + "loss": 3.1016, + "step": 5380 + }, + { + "epoch": 0.87, + "learning_rate": 4.526786805069549e-05, + "loss": 3.1437, + "step": 5381 + }, + { + "epoch": 0.87, + "learning_rate": 4.5159350030334665e-05, + "loss": 3.38, + "step": 5382 + }, + { + "epoch": 0.87, + "learning_rate": 4.505095608596216e-05, + "loss": 3.0378, + "step": 5383 + }, + { + "epoch": 0.87, + "learning_rate": 4.494268624714687e-05, + "loss": 3.1409, + "step": 5384 + }, + { + "epoch": 0.87, + "learning_rate": 4.483454054342373e-05, + "loss": 3.0254, + "step": 5385 + }, + { + "epoch": 0.87, + "learning_rate": 4.472651900429392e-05, + "loss": 3.1104, + "step": 5386 + }, + { + "epoch": 0.87, + "learning_rate": 4.461862165922437e-05, + "loss": 3.0587, + "step": 5387 + }, + { + "epoch": 0.87, + "learning_rate": 4.4510848537648694e-05, + "loss": 3.1152, + "step": 5388 + }, + { + "epoch": 0.87, + "learning_rate": 4.440319966896611e-05, + "loss": 3.1375, + "step": 5389 + }, + { + "epoch": 0.87, + "learning_rate": 4.429567508254223e-05, + "loss": 3.0635, + "step": 5390 + }, + { + "epoch": 0.87, + "learning_rate": 4.4188274807708705e-05, + "loss": 3.1113, + "step": 5391 + }, + { + "epoch": 0.87, + "learning_rate": 4.408099887376332e-05, + "loss": 3.1863, + "step": 5392 + }, + { + "epoch": 0.87, + "learning_rate": 4.397384730996962e-05, + "loss": 3.1055, + "step": 5393 + }, + { + "epoch": 0.87, + "learning_rate": 4.386682014555776e-05, + "loss": 3.2366, + "step": 5394 + }, + { + "epoch": 0.87, + "learning_rate": 4.375991740972368e-05, + "loss": 3.0813, + "step": 5395 + }, + { + "epoch": 0.87, + "learning_rate": 4.365313913162916e-05, + "loss": 3.1199, + "step": 5396 + }, + { + "epoch": 0.87, + "learning_rate": 4.3546485340402395e-05, + "loss": 3.0969, + "step": 5397 + }, + { + "epoch": 0.87, + "learning_rate": 4.343995606513751e-05, + "loss": 3.0795, + "step": 5398 + }, + { + "epoch": 0.87, + "learning_rate": 4.333355133489442e-05, + "loss": 3.1548, + "step": 5399 + }, + { + "epoch": 0.87, + "learning_rate": 4.322727117869951e-05, + "loss": 3.094, + "step": 5400 + }, + { + "epoch": 0.87, + "learning_rate": 4.3121115625544935e-05, + "loss": 3.0508, + "step": 5401 + }, + { + "epoch": 0.87, + "learning_rate": 4.301508470438869e-05, + "loss": 3.0741, + "step": 5402 + }, + { + "epoch": 0.87, + "learning_rate": 4.2909178444155094e-05, + "loss": 3.1428, + "step": 5403 + }, + { + "epoch": 0.87, + "learning_rate": 4.280339687373436e-05, + "loss": 3.2297, + "step": 5404 + }, + { + "epoch": 0.87, + "learning_rate": 4.269774002198235e-05, + "loss": 2.9579, + "step": 5405 + }, + { + "epoch": 0.87, + "learning_rate": 4.259220791772156e-05, + "loss": 3.1745, + "step": 5406 + }, + { + "epoch": 0.87, + "learning_rate": 4.248680058973997e-05, + "loss": 3.3474, + "step": 5407 + }, + { + "epoch": 0.87, + "learning_rate": 4.238151806679158e-05, + "loss": 3.1116, + "step": 5408 + }, + { + "epoch": 0.87, + "learning_rate": 4.227636037759641e-05, + "loss": 3.1529, + "step": 5409 + }, + { + "epoch": 0.87, + "learning_rate": 4.217132755084058e-05, + "loss": 3.1089, + "step": 5410 + }, + { + "epoch": 0.87, + "learning_rate": 4.2066419615175824e-05, + "loss": 3.1472, + "step": 5411 + }, + { + "epoch": 0.87, + "learning_rate": 4.196163659921992e-05, + "loss": 3.2151, + "step": 5412 + }, + { + "epoch": 0.87, + "learning_rate": 4.18569785315569e-05, + "loss": 3.1856, + "step": 5413 + }, + { + "epoch": 0.87, + "learning_rate": 4.1752445440736254e-05, + "loss": 3.0456, + "step": 5414 + }, + { + "epoch": 0.87, + "learning_rate": 4.164803735527356e-05, + "loss": 3.096, + "step": 5415 + }, + { + "epoch": 0.87, + "learning_rate": 4.1543754303650484e-05, + "loss": 3.3045, + "step": 5416 + }, + { + "epoch": 0.87, + "learning_rate": 4.143959631431415e-05, + "loss": 2.9486, + "step": 5417 + }, + { + "epoch": 0.87, + "learning_rate": 4.1335563415677844e-05, + "loss": 3.1236, + "step": 5418 + }, + { + "epoch": 0.87, + "learning_rate": 4.123165563612097e-05, + "loss": 3.3154, + "step": 5419 + }, + { + "epoch": 0.87, + "learning_rate": 4.1127873003988233e-05, + "loss": 3.054, + "step": 5420 + }, + { + "epoch": 0.87, + "learning_rate": 4.102421554759061e-05, + "loss": 3.0125, + "step": 5421 + }, + { + "epoch": 0.87, + "learning_rate": 4.092068329520493e-05, + "loss": 3.0724, + "step": 5422 + }, + { + "epoch": 0.87, + "learning_rate": 4.081727627507359e-05, + "loss": 3.0123, + "step": 5423 + }, + { + "epoch": 0.87, + "learning_rate": 4.071399451540497e-05, + "loss": 3.1354, + "step": 5424 + }, + { + "epoch": 0.87, + "learning_rate": 4.061083804437355e-05, + "loss": 3.0101, + "step": 5425 + }, + { + "epoch": 0.87, + "learning_rate": 4.050780689011912e-05, + "loss": 3.0255, + "step": 5426 + }, + { + "epoch": 0.88, + "learning_rate": 4.040490108074768e-05, + "loss": 3.2069, + "step": 5427 + }, + { + "epoch": 0.88, + "learning_rate": 4.0302120644330864e-05, + "loss": 3.0411, + "step": 5428 + }, + { + "epoch": 0.88, + "learning_rate": 4.019946560890625e-05, + "loss": 3.0705, + "step": 5429 + }, + { + "epoch": 0.88, + "learning_rate": 4.009693600247688e-05, + "loss": 3.2786, + "step": 5430 + }, + { + "epoch": 0.88, + "learning_rate": 3.999453185301211e-05, + "loss": 2.9021, + "step": 5431 + }, + { + "epoch": 0.88, + "learning_rate": 3.9892253188446524e-05, + "loss": 3.276, + "step": 5432 + }, + { + "epoch": 0.88, + "learning_rate": 3.979010003668082e-05, + "loss": 3.0801, + "step": 5433 + }, + { + "epoch": 0.88, + "learning_rate": 3.968807242558131e-05, + "loss": 3.3216, + "step": 5434 + }, + { + "epoch": 0.88, + "learning_rate": 3.958617038298029e-05, + "loss": 3.2306, + "step": 5435 + }, + { + "epoch": 0.88, + "learning_rate": 3.948439393667536e-05, + "loss": 3.1317, + "step": 5436 + }, + { + "epoch": 0.88, + "learning_rate": 3.938274311443018e-05, + "loss": 3.206, + "step": 5437 + }, + { + "epoch": 0.88, + "learning_rate": 3.9281217943974214e-05, + "loss": 3.1811, + "step": 5438 + }, + { + "epoch": 0.88, + "learning_rate": 3.917981845300239e-05, + "loss": 3.1741, + "step": 5439 + }, + { + "epoch": 0.88, + "learning_rate": 3.907854466917554e-05, + "loss": 3.0861, + "step": 5440 + }, + { + "epoch": 0.88, + "learning_rate": 3.897739662012017e-05, + "loss": 3.1293, + "step": 5441 + }, + { + "epoch": 0.88, + "learning_rate": 3.8876374333428335e-05, + "loss": 3.0845, + "step": 5442 + }, + { + "epoch": 0.88, + "learning_rate": 3.8775477836658026e-05, + "loss": 3.1251, + "step": 5443 + }, + { + "epoch": 0.88, + "learning_rate": 3.867470715733268e-05, + "loss": 3.1721, + "step": 5444 + }, + { + "epoch": 0.88, + "learning_rate": 3.857406232294164e-05, + "loss": 3.0012, + "step": 5445 + }, + { + "epoch": 0.88, + "learning_rate": 3.8473543360939775e-05, + "loss": 3.2365, + "step": 5446 + }, + { + "epoch": 0.88, + "learning_rate": 3.837315029874777e-05, + "loss": 3.2745, + "step": 5447 + }, + { + "epoch": 0.88, + "learning_rate": 3.8272883163751605e-05, + "loss": 3.1103, + "step": 5448 + }, + { + "epoch": 0.88, + "learning_rate": 3.817274198330323e-05, + "loss": 3.3087, + "step": 5449 + }, + { + "epoch": 0.88, + "learning_rate": 3.80727267847204e-05, + "loss": 3.1715, + "step": 5450 + }, + { + "epoch": 0.88, + "learning_rate": 3.797283759528597e-05, + "loss": 3.1065, + "step": 5451 + }, + { + "epoch": 0.88, + "learning_rate": 3.78730744422488e-05, + "loss": 3.1389, + "step": 5452 + }, + { + "epoch": 0.88, + "learning_rate": 3.777343735282346e-05, + "loss": 3.1071, + "step": 5453 + }, + { + "epoch": 0.88, + "learning_rate": 3.767392635418965e-05, + "loss": 3.0999, + "step": 5454 + }, + { + "epoch": 0.88, + "learning_rate": 3.757454147349304e-05, + "loss": 3.1647, + "step": 5455 + }, + { + "epoch": 0.88, + "learning_rate": 3.7475282737845116e-05, + "loss": 2.9632, + "step": 5456 + }, + { + "epoch": 0.88, + "learning_rate": 3.737615017432239e-05, + "loss": 3.104, + "step": 5457 + }, + { + "epoch": 0.88, + "learning_rate": 3.7277143809967274e-05, + "loss": 3.2383, + "step": 5458 + }, + { + "epoch": 0.88, + "learning_rate": 3.717826367178789e-05, + "loss": 2.9692, + "step": 5459 + }, + { + "epoch": 0.88, + "learning_rate": 3.70795097867575e-05, + "loss": 3.0111, + "step": 5460 + }, + { + "epoch": 0.88, + "learning_rate": 3.698088218181533e-05, + "loss": 3.0743, + "step": 5461 + }, + { + "epoch": 0.88, + "learning_rate": 3.688238088386592e-05, + "loss": 3.0761, + "step": 5462 + }, + { + "epoch": 0.88, + "learning_rate": 3.678400591977954e-05, + "loss": 3.1577, + "step": 5463 + }, + { + "epoch": 0.88, + "learning_rate": 3.668575731639184e-05, + "loss": 2.9539, + "step": 5464 + }, + { + "epoch": 0.88, + "learning_rate": 3.658763510050417e-05, + "loss": 3.1983, + "step": 5465 + }, + { + "epoch": 0.88, + "learning_rate": 3.64896392988831e-05, + "loss": 3.0652, + "step": 5466 + }, + { + "epoch": 0.88, + "learning_rate": 3.6391769938261047e-05, + "loss": 3.1895, + "step": 5467 + }, + { + "epoch": 0.88, + "learning_rate": 3.6294027045335686e-05, + "loss": 3.083, + "step": 5468 + }, + { + "epoch": 0.88, + "learning_rate": 3.619641064677037e-05, + "loss": 3.1826, + "step": 5469 + }, + { + "epoch": 0.88, + "learning_rate": 3.609892076919391e-05, + "loss": 3.0434, + "step": 5470 + }, + { + "epoch": 0.88, + "learning_rate": 3.600155743920058e-05, + "loss": 3.2173, + "step": 5471 + }, + { + "epoch": 0.88, + "learning_rate": 3.590432068334998e-05, + "loss": 3.163, + "step": 5472 + }, + { + "epoch": 0.88, + "learning_rate": 3.5807210528167434e-05, + "loss": 3.1514, + "step": 5473 + }, + { + "epoch": 0.88, + "learning_rate": 3.5710227000143646e-05, + "loss": 3.205, + "step": 5474 + }, + { + "epoch": 0.88, + "learning_rate": 3.5613370125734734e-05, + "loss": 3.2162, + "step": 5475 + }, + { + "epoch": 0.88, + "learning_rate": 3.5516639931362224e-05, + "loss": 3.1297, + "step": 5476 + }, + { + "epoch": 0.88, + "learning_rate": 3.542003644341329e-05, + "loss": 3.2072, + "step": 5477 + }, + { + "epoch": 0.88, + "learning_rate": 3.5323559688240246e-05, + "loss": 3.1577, + "step": 5478 + }, + { + "epoch": 0.88, + "learning_rate": 3.5227209692161035e-05, + "loss": 3.262, + "step": 5479 + }, + { + "epoch": 0.88, + "learning_rate": 3.5130986481458956e-05, + "loss": 3.1636, + "step": 5480 + }, + { + "epoch": 0.88, + "learning_rate": 3.503489008238281e-05, + "loss": 3.2934, + "step": 5481 + }, + { + "epoch": 0.88, + "learning_rate": 3.493892052114666e-05, + "loss": 3.1579, + "step": 5482 + }, + { + "epoch": 0.88, + "learning_rate": 3.4843077823930146e-05, + "loss": 3.2004, + "step": 5483 + }, + { + "epoch": 0.88, + "learning_rate": 3.474736201687817e-05, + "loss": 3.1963, + "step": 5484 + }, + { + "epoch": 0.88, + "learning_rate": 3.4651773126100904e-05, + "loss": 3.0756, + "step": 5485 + }, + { + "epoch": 0.88, + "learning_rate": 3.455631117767422e-05, + "loss": 3.0818, + "step": 5486 + }, + { + "epoch": 0.88, + "learning_rate": 3.4460976197639047e-05, + "loss": 3.0706, + "step": 5487 + }, + { + "epoch": 0.88, + "learning_rate": 3.436576821200193e-05, + "loss": 3.1925, + "step": 5488 + }, + { + "epoch": 0.89, + "learning_rate": 3.42706872467346e-05, + "loss": 3.2538, + "step": 5489 + }, + { + "epoch": 0.89, + "learning_rate": 3.4175733327774296e-05, + "loss": 3.091, + "step": 5490 + }, + { + "epoch": 0.89, + "learning_rate": 3.40809064810233e-05, + "loss": 2.994, + "step": 5491 + }, + { + "epoch": 0.89, + "learning_rate": 3.398620673234953e-05, + "loss": 3.0751, + "step": 5492 + }, + { + "epoch": 0.89, + "learning_rate": 3.389163410758622e-05, + "loss": 3.1426, + "step": 5493 + }, + { + "epoch": 0.89, + "learning_rate": 3.3797188632531675e-05, + "loss": 2.9936, + "step": 5494 + }, + { + "epoch": 0.89, + "learning_rate": 3.3702870332949776e-05, + "loss": 3.279, + "step": 5495 + }, + { + "epoch": 0.89, + "learning_rate": 3.360867923456973e-05, + "loss": 3.1055, + "step": 5496 + }, + { + "epoch": 0.89, + "learning_rate": 3.351461536308564e-05, + "loss": 3.1975, + "step": 5497 + }, + { + "epoch": 0.89, + "learning_rate": 3.3420678744157384e-05, + "loss": 3.173, + "step": 5498 + }, + { + "epoch": 0.89, + "learning_rate": 3.33268694034099e-05, + "loss": 2.9665, + "step": 5499 + }, + { + "epoch": 0.89, + "learning_rate": 3.3233187366433436e-05, + "loss": 3.1158, + "step": 5500 + }, + { + "epoch": 0.89, + "learning_rate": 3.313963265878356e-05, + "loss": 2.9923, + "step": 5501 + }, + { + "epoch": 0.89, + "learning_rate": 3.3046205305981066e-05, + "loss": 3.1909, + "step": 5502 + }, + { + "epoch": 0.89, + "learning_rate": 3.2952905333511865e-05, + "loss": 3.0754, + "step": 5503 + }, + { + "epoch": 0.89, + "learning_rate": 3.285973276682736e-05, + "loss": 3.1634, + "step": 5504 + }, + { + "epoch": 0.89, + "learning_rate": 3.276668763134405e-05, + "loss": 3.1021, + "step": 5505 + }, + { + "epoch": 0.89, + "learning_rate": 3.267376995244381e-05, + "loss": 3.1038, + "step": 5506 + }, + { + "epoch": 0.89, + "learning_rate": 3.2580979755473586e-05, + "loss": 3.0182, + "step": 5507 + }, + { + "epoch": 0.89, + "learning_rate": 3.248831706574568e-05, + "loss": 3.2036, + "step": 5508 + }, + { + "epoch": 0.89, + "learning_rate": 3.239578190853748e-05, + "loss": 3.0976, + "step": 5509 + }, + { + "epoch": 0.89, + "learning_rate": 3.2303374309091635e-05, + "loss": 3.1029, + "step": 5510 + }, + { + "epoch": 0.89, + "learning_rate": 3.221109429261615e-05, + "loss": 3.0883, + "step": 5511 + }, + { + "epoch": 0.89, + "learning_rate": 3.2118941884283824e-05, + "loss": 3.1463, + "step": 5512 + }, + { + "epoch": 0.89, + "learning_rate": 3.202691710923317e-05, + "loss": 2.9521, + "step": 5513 + }, + { + "epoch": 0.89, + "learning_rate": 3.193501999256765e-05, + "loss": 3.132, + "step": 5514 + }, + { + "epoch": 0.89, + "learning_rate": 3.1843250559355666e-05, + "loss": 3.1626, + "step": 5515 + }, + { + "epoch": 0.89, + "learning_rate": 3.175160883463113e-05, + "loss": 3.1504, + "step": 5516 + }, + { + "epoch": 0.89, + "learning_rate": 3.166009484339294e-05, + "loss": 3.0446, + "step": 5517 + }, + { + "epoch": 0.89, + "learning_rate": 3.156870861060529e-05, + "loss": 3.214, + "step": 5518 + }, + { + "epoch": 0.89, + "learning_rate": 3.1477450161197297e-05, + "loss": 3.0483, + "step": 5519 + }, + { + "epoch": 0.89, + "learning_rate": 3.13863195200636e-05, + "loss": 3.159, + "step": 5520 + }, + { + "epoch": 0.89, + "learning_rate": 3.1295316712063426e-05, + "loss": 2.9099, + "step": 5521 + }, + { + "epoch": 0.89, + "learning_rate": 3.120444176202153e-05, + "loss": 2.9941, + "step": 5522 + }, + { + "epoch": 0.89, + "learning_rate": 3.11136946947278e-05, + "loss": 3.1232, + "step": 5523 + }, + { + "epoch": 0.89, + "learning_rate": 3.102307553493699e-05, + "loss": 3.1427, + "step": 5524 + }, + { + "epoch": 0.89, + "learning_rate": 3.093258430736923e-05, + "loss": 3.1802, + "step": 5525 + }, + { + "epoch": 0.89, + "learning_rate": 3.084222103670964e-05, + "loss": 3.2014, + "step": 5526 + }, + { + "epoch": 0.89, + "learning_rate": 3.075198574760823e-05, + "loss": 3.175, + "step": 5527 + }, + { + "epoch": 0.89, + "learning_rate": 3.066187846468038e-05, + "loss": 3.1835, + "step": 5528 + }, + { + "epoch": 0.89, + "learning_rate": 3.057189921250653e-05, + "loss": 3.2351, + "step": 5529 + }, + { + "epoch": 0.89, + "learning_rate": 3.0482048015632036e-05, + "loss": 3.2159, + "step": 5530 + }, + { + "epoch": 0.89, + "learning_rate": 3.0392324898567414e-05, + "loss": 3.1304, + "step": 5531 + }, + { + "epoch": 0.89, + "learning_rate": 3.030272988578825e-05, + "loss": 3.0428, + "step": 5532 + }, + { + "epoch": 0.89, + "learning_rate": 3.0213263001735326e-05, + "loss": 3.0456, + "step": 5533 + }, + { + "epoch": 0.89, + "learning_rate": 3.0123924270814008e-05, + "loss": 2.9054, + "step": 5534 + }, + { + "epoch": 0.89, + "learning_rate": 3.003471371739519e-05, + "loss": 3.1082, + "step": 5535 + }, + { + "epoch": 0.89, + "learning_rate": 2.994563136581474e-05, + "loss": 3.1387, + "step": 5536 + }, + { + "epoch": 0.89, + "learning_rate": 2.9856677240373055e-05, + "loss": 3.2325, + "step": 5537 + }, + { + "epoch": 0.89, + "learning_rate": 2.9767851365336273e-05, + "loss": 3.1436, + "step": 5538 + }, + { + "epoch": 0.89, + "learning_rate": 2.9679153764935184e-05, + "loss": 3.1744, + "step": 5539 + }, + { + "epoch": 0.89, + "learning_rate": 2.9590584463365434e-05, + "loss": 3.107, + "step": 5540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9502143484787968e-05, + "loss": 3.083, + "step": 5541 + }, + { + "epoch": 0.89, + "learning_rate": 2.9413830853328606e-05, + "loss": 3.1019, + "step": 5542 + }, + { + "epoch": 0.89, + "learning_rate": 2.9325646593078014e-05, + "loss": 3.1257, + "step": 5543 + }, + { + "epoch": 0.89, + "learning_rate": 2.9237590728092122e-05, + "loss": 3.1544, + "step": 5544 + }, + { + "epoch": 0.89, + "learning_rate": 2.9149663282391713e-05, + "loss": 3.0667, + "step": 5545 + }, + { + "epoch": 0.89, + "learning_rate": 2.906186427996238e-05, + "loss": 3.2552, + "step": 5546 + }, + { + "epoch": 0.89, + "learning_rate": 2.8974193744754907e-05, + "loss": 3.1815, + "step": 5547 + }, + { + "epoch": 0.89, + "learning_rate": 2.8886651700684995e-05, + "loss": 3.252, + "step": 5548 + }, + { + "epoch": 0.89, + "learning_rate": 2.8799238171633047e-05, + "loss": 3.1829, + "step": 5549 + }, + { + "epoch": 0.89, + "learning_rate": 2.8711953181444707e-05, + "loss": 3.1914, + "step": 5550 + }, + { + "epoch": 0.9, + "learning_rate": 2.8624796753930592e-05, + "loss": 3.0172, + "step": 5551 + }, + { + "epoch": 0.9, + "learning_rate": 2.8537768912865912e-05, + "loss": 3.048, + "step": 5552 + }, + { + "epoch": 0.9, + "learning_rate": 2.845086968199101e-05, + "loss": 3.0889, + "step": 5553 + }, + { + "epoch": 0.9, + "learning_rate": 2.836409908501131e-05, + "loss": 3.1821, + "step": 5554 + }, + { + "epoch": 0.9, + "learning_rate": 2.8277457145596653e-05, + "loss": 3.1015, + "step": 5555 + }, + { + "epoch": 0.9, + "learning_rate": 2.8190943887382303e-05, + "loss": 2.9738, + "step": 5556 + }, + { + "epoch": 0.9, + "learning_rate": 2.8104559333968326e-05, + "loss": 3.0788, + "step": 5557 + }, + { + "epoch": 0.9, + "learning_rate": 2.8018303508919262e-05, + "loss": 3.1068, + "step": 5558 + }, + { + "epoch": 0.9, + "learning_rate": 2.7932176435765066e-05, + "loss": 3.239, + "step": 5559 + }, + { + "epoch": 0.9, + "learning_rate": 2.7846178138000333e-05, + "loss": 3.0694, + "step": 5560 + }, + { + "epoch": 0.9, + "learning_rate": 2.7760308639084408e-05, + "loss": 3.0002, + "step": 5561 + }, + { + "epoch": 0.9, + "learning_rate": 2.767456796244161e-05, + "loss": 3.1365, + "step": 5562 + }, + { + "epoch": 0.9, + "learning_rate": 2.7588956131461396e-05, + "loss": 3.1457, + "step": 5563 + }, + { + "epoch": 0.9, + "learning_rate": 2.750347316949764e-05, + "loss": 3.1996, + "step": 5564 + }, + { + "epoch": 0.9, + "learning_rate": 2.741811909986919e-05, + "loss": 3.0402, + "step": 5565 + }, + { + "epoch": 0.9, + "learning_rate": 2.733289394585986e-05, + "loss": 3.0723, + "step": 5566 + }, + { + "epoch": 0.9, + "learning_rate": 2.7247797730718338e-05, + "loss": 3.1055, + "step": 5567 + }, + { + "epoch": 0.9, + "learning_rate": 2.7162830477657717e-05, + "loss": 3.2497, + "step": 5568 + }, + { + "epoch": 0.9, + "learning_rate": 2.7077992209856518e-05, + "loss": 3.1498, + "step": 5569 + }, + { + "epoch": 0.9, + "learning_rate": 2.6993282950457664e-05, + "loss": 3.2538, + "step": 5570 + }, + { + "epoch": 0.9, + "learning_rate": 2.69087027225689e-05, + "loss": 3.1881, + "step": 5571 + }, + { + "epoch": 0.9, + "learning_rate": 2.682425154926299e-05, + "loss": 3.1852, + "step": 5572 + }, + { + "epoch": 0.9, + "learning_rate": 2.6739929453577396e-05, + "loss": 2.9833, + "step": 5573 + }, + { + "epoch": 0.9, + "learning_rate": 2.665573645851416e-05, + "loss": 2.9415, + "step": 5574 + }, + { + "epoch": 0.9, + "learning_rate": 2.657167258704052e-05, + "loss": 3.0489, + "step": 5575 + }, + { + "epoch": 0.9, + "learning_rate": 2.6487737862088136e-05, + "loss": 3.0258, + "step": 5576 + }, + { + "epoch": 0.9, + "learning_rate": 2.6403932306553523e-05, + "loss": 3.1672, + "step": 5577 + }, + { + "epoch": 0.9, + "learning_rate": 2.6320255943298065e-05, + "loss": 3.0145, + "step": 5578 + }, + { + "epoch": 0.9, + "learning_rate": 2.6236708795147945e-05, + "loss": 3.1491, + "step": 5579 + }, + { + "epoch": 0.9, + "learning_rate": 2.615329088489371e-05, + "loss": 3.1024, + "step": 5580 + }, + { + "epoch": 0.9, + "learning_rate": 2.6070002235291212e-05, + "loss": 3.2109, + "step": 5581 + }, + { + "epoch": 0.9, + "learning_rate": 2.5986842869060613e-05, + "loss": 3.0365, + "step": 5582 + }, + { + "epoch": 0.9, + "learning_rate": 2.5903812808886985e-05, + "loss": 3.0836, + "step": 5583 + }, + { + "epoch": 0.9, + "learning_rate": 2.58209120774201e-05, + "loss": 3.0877, + "step": 5584 + }, + { + "epoch": 0.9, + "learning_rate": 2.5738140697274537e-05, + "loss": 3.2719, + "step": 5585 + }, + { + "epoch": 0.9, + "learning_rate": 2.5655498691029343e-05, + "loss": 3.1799, + "step": 5586 + }, + { + "epoch": 0.9, + "learning_rate": 2.557298608122849e-05, + "loss": 3.0924, + "step": 5587 + }, + { + "epoch": 0.9, + "learning_rate": 2.5490602890380687e-05, + "loss": 3.1799, + "step": 5588 + }, + { + "epoch": 0.9, + "learning_rate": 2.540834914095913e-05, + "loss": 3.1245, + "step": 5589 + }, + { + "epoch": 0.9, + "learning_rate": 2.5326224855401925e-05, + "loss": 3.1834, + "step": 5590 + }, + { + "epoch": 0.9, + "learning_rate": 2.5244230056111704e-05, + "loss": 3.2579, + "step": 5591 + }, + { + "epoch": 0.9, + "learning_rate": 2.5162364765455804e-05, + "loss": 3.1161, + "step": 5592 + }, + { + "epoch": 0.9, + "learning_rate": 2.5080629005766188e-05, + "loss": 3.1203, + "step": 5593 + }, + { + "epoch": 0.9, + "learning_rate": 2.4999022799339798e-05, + "loss": 3.1735, + "step": 5594 + }, + { + "epoch": 0.9, + "learning_rate": 2.4917546168437722e-05, + "loss": 3.2784, + "step": 5595 + }, + { + "epoch": 0.9, + "learning_rate": 2.483619913528612e-05, + "loss": 3.085, + "step": 5596 + }, + { + "epoch": 0.9, + "learning_rate": 2.4754981722075744e-05, + "loss": 3.017, + "step": 5597 + }, + { + "epoch": 0.9, + "learning_rate": 2.4673893950961645e-05, + "loss": 3.2078, + "step": 5598 + }, + { + "epoch": 0.9, + "learning_rate": 2.4592935844063858e-05, + "loss": 3.1349, + "step": 5599 + }, + { + "epoch": 0.9, + "learning_rate": 2.4512107423467045e-05, + "loss": 3.0338, + "step": 5600 + }, + { + "epoch": 0.9, + "learning_rate": 2.4431408711220294e-05, + "loss": 3.0361, + "step": 5601 + }, + { + "epoch": 0.9, + "learning_rate": 2.435083972933744e-05, + "loss": 3.268, + "step": 5602 + }, + { + "epoch": 0.9, + "learning_rate": 2.427040049979695e-05, + "loss": 3.0204, + "step": 5603 + }, + { + "epoch": 0.9, + "learning_rate": 2.419009104454173e-05, + "loss": 3.1422, + "step": 5604 + }, + { + "epoch": 0.9, + "learning_rate": 2.4109911385479356e-05, + "loss": 3.0914, + "step": 5605 + }, + { + "epoch": 0.9, + "learning_rate": 2.4029861544482277e-05, + "loss": 3.0345, + "step": 5606 + }, + { + "epoch": 0.9, + "learning_rate": 2.3949941543387087e-05, + "loss": 3.2027, + "step": 5607 + }, + { + "epoch": 0.9, + "learning_rate": 2.3870151403995234e-05, + "loss": 3.1112, + "step": 5608 + }, + { + "epoch": 0.9, + "learning_rate": 2.379049114807269e-05, + "loss": 3.1418, + "step": 5609 + }, + { + "epoch": 0.9, + "learning_rate": 2.3710960797349912e-05, + "loss": 3.0829, + "step": 5610 + }, + { + "epoch": 0.9, + "learning_rate": 2.3631560373522044e-05, + "loss": 3.1345, + "step": 5611 + }, + { + "epoch": 0.9, + "learning_rate": 2.35522898982487e-05, + "loss": 3.2449, + "step": 5612 + }, + { + "epoch": 0.91, + "learning_rate": 2.3473149393154037e-05, + "loss": 3.1377, + "step": 5613 + }, + { + "epoch": 0.91, + "learning_rate": 2.3394138879826886e-05, + "loss": 3.2888, + "step": 5614 + }, + { + "epoch": 0.91, + "learning_rate": 2.3315258379820516e-05, + "loss": 3.2535, + "step": 5615 + }, + { + "epoch": 0.91, + "learning_rate": 2.323650791465265e-05, + "loss": 3.1639, + "step": 5616 + }, + { + "epoch": 0.91, + "learning_rate": 2.315788750580572e-05, + "loss": 3.1812, + "step": 5617 + }, + { + "epoch": 0.91, + "learning_rate": 2.3079397174726512e-05, + "loss": 3.033, + "step": 5618 + }, + { + "epoch": 0.91, + "learning_rate": 2.300103694282646e-05, + "loss": 3.105, + "step": 5619 + }, + { + "epoch": 0.91, + "learning_rate": 2.2922806831481402e-05, + "loss": 3.1516, + "step": 5620 + }, + { + "epoch": 0.91, + "learning_rate": 2.284470686203177e-05, + "loss": 3.1781, + "step": 5621 + }, + { + "epoch": 0.91, + "learning_rate": 2.2766737055782583e-05, + "loss": 3.1672, + "step": 5622 + }, + { + "epoch": 0.91, + "learning_rate": 2.2688897434002986e-05, + "loss": 3.4466, + "step": 5623 + }, + { + "epoch": 0.91, + "learning_rate": 2.2611188017926943e-05, + "loss": 3.1497, + "step": 5624 + }, + { + "epoch": 0.91, + "learning_rate": 2.2533608828752827e-05, + "loss": 3.1041, + "step": 5625 + }, + { + "epoch": 0.91, + "learning_rate": 2.245615988764349e-05, + "loss": 3.1792, + "step": 5626 + }, + { + "epoch": 0.91, + "learning_rate": 2.23788412157262e-05, + "loss": 3.1931, + "step": 5627 + }, + { + "epoch": 0.91, + "learning_rate": 2.23016528340928e-05, + "loss": 2.9503, + "step": 5628 + }, + { + "epoch": 0.91, + "learning_rate": 2.2224594763799344e-05, + "loss": 3.2422, + "step": 5629 + }, + { + "epoch": 0.91, + "learning_rate": 2.2147667025866568e-05, + "loss": 3.1883, + "step": 5630 + }, + { + "epoch": 0.91, + "learning_rate": 2.2070869641279633e-05, + "loss": 3.2113, + "step": 5631 + }, + { + "epoch": 0.91, + "learning_rate": 2.1994202630988113e-05, + "loss": 3.175, + "step": 5632 + }, + { + "epoch": 0.91, + "learning_rate": 2.1917666015905946e-05, + "loss": 3.1057, + "step": 5633 + }, + { + "epoch": 0.91, + "learning_rate": 2.184125981691165e-05, + "loss": 3.0862, + "step": 5634 + }, + { + "epoch": 0.91, + "learning_rate": 2.1764984054847947e-05, + "loss": 3.2033, + "step": 5635 + }, + { + "epoch": 0.91, + "learning_rate": 2.1688838750522134e-05, + "loss": 3.148, + "step": 5636 + }, + { + "epoch": 0.91, + "learning_rate": 2.1612823924705927e-05, + "loss": 3.1849, + "step": 5637 + }, + { + "epoch": 0.91, + "learning_rate": 2.1536939598135406e-05, + "loss": 3.1379, + "step": 5638 + }, + { + "epoch": 0.91, + "learning_rate": 2.1461185791511072e-05, + "loss": 3.1795, + "step": 5639 + }, + { + "epoch": 0.91, + "learning_rate": 2.1385562525497838e-05, + "loss": 3.0553, + "step": 5640 + }, + { + "epoch": 0.91, + "learning_rate": 2.1310069820724866e-05, + "loss": 3.1972, + "step": 5641 + }, + { + "epoch": 0.91, + "learning_rate": 2.1234707697785848e-05, + "loss": 3.1765, + "step": 5642 + }, + { + "epoch": 0.91, + "learning_rate": 2.1159476177238846e-05, + "loss": 3.062, + "step": 5643 + }, + { + "epoch": 0.91, + "learning_rate": 2.1084375279606273e-05, + "loss": 3.172, + "step": 5644 + }, + { + "epoch": 0.91, + "learning_rate": 2.1009405025374904e-05, + "loss": 3.1337, + "step": 5645 + }, + { + "epoch": 0.91, + "learning_rate": 2.0934565434995944e-05, + "loss": 3.0838, + "step": 5646 + }, + { + "epoch": 0.91, + "learning_rate": 2.0859856528884726e-05, + "loss": 3.0862, + "step": 5647 + }, + { + "epoch": 0.91, + "learning_rate": 2.0785278327421218e-05, + "loss": 3.1934, + "step": 5648 + }, + { + "epoch": 0.91, + "learning_rate": 2.0710830850949547e-05, + "loss": 3.1889, + "step": 5649 + }, + { + "epoch": 0.91, + "learning_rate": 2.0636514119778238e-05, + "loss": 3.1315, + "step": 5650 + }, + { + "epoch": 0.91, + "learning_rate": 2.0562328154180188e-05, + "loss": 3.3087, + "step": 5651 + }, + { + "epoch": 0.91, + "learning_rate": 2.0488272974392654e-05, + "loss": 3.1141, + "step": 5652 + }, + { + "epoch": 0.91, + "learning_rate": 2.041434860061697e-05, + "loss": 3.1611, + "step": 5653 + }, + { + "epoch": 0.91, + "learning_rate": 2.0340555053019126e-05, + "loss": 3.149, + "step": 5654 + }, + { + "epoch": 0.91, + "learning_rate": 2.0266892351729183e-05, + "loss": 3.0845, + "step": 5655 + }, + { + "epoch": 0.91, + "learning_rate": 2.0193360516841618e-05, + "loss": 3.1973, + "step": 5656 + }, + { + "epoch": 0.91, + "learning_rate": 2.011995956841517e-05, + "loss": 2.975, + "step": 5657 + }, + { + "epoch": 0.91, + "learning_rate": 2.004668952647298e-05, + "loss": 3.0875, + "step": 5658 + }, + { + "epoch": 0.91, + "learning_rate": 1.997355041100224e-05, + "loss": 2.9051, + "step": 5659 + }, + { + "epoch": 0.91, + "learning_rate": 1.9900542241954645e-05, + "loss": 3.085, + "step": 5660 + }, + { + "epoch": 0.91, + "learning_rate": 1.9827665039246046e-05, + "loss": 3.2426, + "step": 5661 + }, + { + "epoch": 0.91, + "learning_rate": 1.975491882275665e-05, + "loss": 3.024, + "step": 5662 + }, + { + "epoch": 0.91, + "learning_rate": 1.968230361233092e-05, + "loss": 2.9406, + "step": 5663 + }, + { + "epoch": 0.91, + "learning_rate": 1.960981942777762e-05, + "loss": 3.163, + "step": 5664 + }, + { + "epoch": 0.91, + "learning_rate": 1.9537466288869542e-05, + "loss": 3.0246, + "step": 5665 + }, + { + "epoch": 0.91, + "learning_rate": 1.946524421534396e-05, + "loss": 3.1116, + "step": 5666 + }, + { + "epoch": 0.91, + "learning_rate": 1.9393153226902384e-05, + "loss": 3.1128, + "step": 5667 + }, + { + "epoch": 0.91, + "learning_rate": 1.932119334321053e-05, + "loss": 3.1036, + "step": 5668 + }, + { + "epoch": 0.91, + "learning_rate": 1.9249364583898253e-05, + "loss": 3.1381, + "step": 5669 + }, + { + "epoch": 0.91, + "learning_rate": 1.9177666968559825e-05, + "loss": 3.0413, + "step": 5670 + }, + { + "epoch": 0.91, + "learning_rate": 1.9106100516753598e-05, + "loss": 3.0466, + "step": 5671 + }, + { + "epoch": 0.91, + "learning_rate": 1.9034665248002126e-05, + "loss": 3.0609, + "step": 5672 + }, + { + "epoch": 0.91, + "learning_rate": 1.896336118179226e-05, + "loss": 3.125, + "step": 5673 + }, + { + "epoch": 0.91, + "learning_rate": 1.889218833757511e-05, + "loss": 3.2234, + "step": 5674 + }, + { + "epoch": 0.92, + "learning_rate": 1.882114673476587e-05, + "loss": 3.1876, + "step": 5675 + }, + { + "epoch": 0.92, + "learning_rate": 1.8750236392743923e-05, + "loss": 3.209, + "step": 5676 + }, + { + "epoch": 0.92, + "learning_rate": 1.8679457330853077e-05, + "loss": 3.1199, + "step": 5677 + }, + { + "epoch": 0.92, + "learning_rate": 1.8608809568400943e-05, + "loss": 3.1794, + "step": 5678 + }, + { + "epoch": 0.92, + "learning_rate": 1.85382931246596e-05, + "loss": 3.0941, + "step": 5679 + }, + { + "epoch": 0.92, + "learning_rate": 1.8467908018865275e-05, + "loss": 3.0452, + "step": 5680 + }, + { + "epoch": 0.92, + "learning_rate": 1.8397654270218278e-05, + "loss": 3.1724, + "step": 5681 + }, + { + "epoch": 0.92, + "learning_rate": 1.8327531897883166e-05, + "loss": 3.0051, + "step": 5682 + }, + { + "epoch": 0.92, + "learning_rate": 1.8257540920988636e-05, + "loss": 3.154, + "step": 5683 + }, + { + "epoch": 0.92, + "learning_rate": 1.8187681358627418e-05, + "loss": 3.1737, + "step": 5684 + }, + { + "epoch": 0.92, + "learning_rate": 1.81179532298566e-05, + "loss": 3.207, + "step": 5685 + }, + { + "epoch": 0.92, + "learning_rate": 1.804835655369741e-05, + "loss": 3.2448, + "step": 5686 + }, + { + "epoch": 0.92, + "learning_rate": 1.7978891349134828e-05, + "loss": 3.1649, + "step": 5687 + }, + { + "epoch": 0.92, + "learning_rate": 1.7909557635118536e-05, + "loss": 3.2022, + "step": 5688 + }, + { + "epoch": 0.92, + "learning_rate": 1.784035543056206e-05, + "loss": 3.1362, + "step": 5689 + }, + { + "epoch": 0.92, + "learning_rate": 1.777128475434292e-05, + "loss": 3.2726, + "step": 5690 + }, + { + "epoch": 0.92, + "learning_rate": 1.770234562530304e-05, + "loss": 3.0861, + "step": 5691 + }, + { + "epoch": 0.92, + "learning_rate": 1.7633538062248323e-05, + "loss": 3.2239, + "step": 5692 + }, + { + "epoch": 0.92, + "learning_rate": 1.756486208394864e-05, + "loss": 3.0944, + "step": 5693 + }, + { + "epoch": 0.92, + "learning_rate": 1.7496317709138234e-05, + "loss": 3.1261, + "step": 5694 + }, + { + "epoch": 0.92, + "learning_rate": 1.7427904956515416e-05, + "loss": 3.0781, + "step": 5695 + }, + { + "epoch": 0.92, + "learning_rate": 1.735962384474232e-05, + "loss": 3.125, + "step": 5696 + }, + { + "epoch": 0.92, + "learning_rate": 1.729147439244538e-05, + "loss": 3.269, + "step": 5697 + }, + { + "epoch": 0.92, + "learning_rate": 1.7223456618215228e-05, + "loss": 3.1959, + "step": 5698 + }, + { + "epoch": 0.92, + "learning_rate": 1.7155570540606236e-05, + "loss": 3.2378, + "step": 5699 + }, + { + "epoch": 0.92, + "learning_rate": 1.7087816178137205e-05, + "loss": 3.1817, + "step": 5700 + }, + { + "epoch": 0.92, + "learning_rate": 1.7020193549290852e-05, + "loss": 2.9893, + "step": 5701 + }, + { + "epoch": 0.92, + "learning_rate": 1.695270267251381e-05, + "loss": 2.9859, + "step": 5702 + }, + { + "epoch": 0.92, + "learning_rate": 1.6885343566217016e-05, + "loss": 3.1793, + "step": 5703 + }, + { + "epoch": 0.92, + "learning_rate": 1.681811624877533e-05, + "loss": 3.0596, + "step": 5704 + }, + { + "epoch": 0.92, + "learning_rate": 1.6751020738527746e-05, + "loss": 3.2552, + "step": 5705 + }, + { + "epoch": 0.92, + "learning_rate": 1.6684057053777235e-05, + "loss": 3.0322, + "step": 5706 + }, + { + "epoch": 0.92, + "learning_rate": 1.66172252127908e-05, + "loss": 3.1178, + "step": 5707 + }, + { + "epoch": 0.92, + "learning_rate": 1.6550525233799462e-05, + "loss": 3.3634, + "step": 5708 + }, + { + "epoch": 0.92, + "learning_rate": 1.6483957134998396e-05, + "loss": 3.1225, + "step": 5709 + }, + { + "epoch": 0.92, + "learning_rate": 1.6417520934546627e-05, + "loss": 3.2244, + "step": 5710 + }, + { + "epoch": 0.92, + "learning_rate": 1.635121665056738e-05, + "loss": 3.0655, + "step": 5711 + }, + { + "epoch": 0.92, + "learning_rate": 1.6285044301147634e-05, + "loss": 3.1764, + "step": 5712 + }, + { + "epoch": 0.92, + "learning_rate": 1.621900390433878e-05, + "loss": 3.1152, + "step": 5713 + }, + { + "epoch": 0.92, + "learning_rate": 1.61530954781558e-05, + "loss": 3.053, + "step": 5714 + }, + { + "epoch": 0.92, + "learning_rate": 1.608731904057792e-05, + "loss": 3.0433, + "step": 5715 + }, + { + "epoch": 0.92, + "learning_rate": 1.6021674609548285e-05, + "loss": 3.1546, + "step": 5716 + }, + { + "epoch": 0.92, + "learning_rate": 1.5956162202974133e-05, + "loss": 3.1169, + "step": 5717 + }, + { + "epoch": 0.92, + "learning_rate": 1.5890781838726388e-05, + "loss": 3.1892, + "step": 5718 + }, + { + "epoch": 0.92, + "learning_rate": 1.582553353464039e-05, + "loss": 3.2242, + "step": 5719 + }, + { + "epoch": 0.92, + "learning_rate": 1.576041730851502e-05, + "loss": 3.0256, + "step": 5720 + }, + { + "epoch": 0.92, + "learning_rate": 1.569543317811345e-05, + "loss": 3.1503, + "step": 5721 + }, + { + "epoch": 0.92, + "learning_rate": 1.5630581161162672e-05, + "loss": 3.1163, + "step": 5722 + }, + { + "epoch": 0.92, + "learning_rate": 1.5565861275353755e-05, + "loss": 3.1321, + "step": 5723 + }, + { + "epoch": 0.92, + "learning_rate": 1.5501273538341466e-05, + "loss": 3.0619, + "step": 5724 + }, + { + "epoch": 0.92, + "learning_rate": 1.543681796774482e-05, + "loss": 3.1392, + "step": 5725 + }, + { + "epoch": 0.92, + "learning_rate": 1.5372494581146702e-05, + "loss": 3.1779, + "step": 5726 + }, + { + "epoch": 0.92, + "learning_rate": 1.5308303396093682e-05, + "loss": 3.1552, + "step": 5727 + }, + { + "epoch": 0.92, + "learning_rate": 1.5244244430096699e-05, + "loss": 3.2945, + "step": 5728 + }, + { + "epoch": 0.92, + "learning_rate": 1.5180317700630274e-05, + "loss": 2.857, + "step": 5729 + }, + { + "epoch": 0.92, + "learning_rate": 1.5116523225132961e-05, + "loss": 3.1144, + "step": 5730 + }, + { + "epoch": 0.92, + "learning_rate": 1.5052861021007336e-05, + "loss": 3.0732, + "step": 5731 + }, + { + "epoch": 0.92, + "learning_rate": 1.498933110561984e-05, + "loss": 3.2471, + "step": 5732 + }, + { + "epoch": 0.92, + "learning_rate": 1.492593349630067e-05, + "loss": 3.153, + "step": 5733 + }, + { + "epoch": 0.92, + "learning_rate": 1.4862668210344154e-05, + "loss": 3.1576, + "step": 5734 + }, + { + "epoch": 0.92, + "learning_rate": 1.4799535265008435e-05, + "loss": 3.2363, + "step": 5735 + }, + { + "epoch": 0.92, + "learning_rate": 1.4736534677515512e-05, + "loss": 3.0556, + "step": 5736 + }, + { + "epoch": 0.93, + "learning_rate": 1.467366646505125e-05, + "loss": 3.1518, + "step": 5737 + }, + { + "epoch": 0.93, + "learning_rate": 1.4610930644765652e-05, + "loss": 3.0887, + "step": 5738 + }, + { + "epoch": 0.93, + "learning_rate": 1.4548327233772308e-05, + "loss": 3.0697, + "step": 5739 + }, + { + "epoch": 0.93, + "learning_rate": 1.4485856249148776e-05, + "loss": 3.0453, + "step": 5740 + }, + { + "epoch": 0.93, + "learning_rate": 1.4423517707936595e-05, + "loss": 3.1049, + "step": 5741 + }, + { + "epoch": 0.93, + "learning_rate": 1.4361311627140995e-05, + "loss": 3.2221, + "step": 5742 + }, + { + "epoch": 0.93, + "learning_rate": 1.4299238023731231e-05, + "loss": 3.0702, + "step": 5743 + }, + { + "epoch": 0.93, + "learning_rate": 1.423729691464043e-05, + "loss": 3.1126, + "step": 5744 + }, + { + "epoch": 0.93, + "learning_rate": 1.4175488316765407e-05, + "loss": 3.1977, + "step": 5745 + }, + { + "epoch": 0.93, + "learning_rate": 1.4113812246967006e-05, + "loss": 3.1111, + "step": 5746 + }, + { + "epoch": 0.93, + "learning_rate": 1.4052268722069828e-05, + "loss": 3.1497, + "step": 5747 + }, + { + "epoch": 0.93, + "learning_rate": 1.3990857758862275e-05, + "loss": 3.122, + "step": 5748 + }, + { + "epoch": 0.93, + "learning_rate": 1.3929579374096613e-05, + "loss": 3.1329, + "step": 5749 + }, + { + "epoch": 0.93, + "learning_rate": 1.3868433584489137e-05, + "loss": 3.1188, + "step": 5750 + }, + { + "epoch": 0.93, + "learning_rate": 1.3807420406719674e-05, + "loss": 3.1085, + "step": 5751 + }, + { + "epoch": 0.93, + "learning_rate": 1.3746539857432016e-05, + "loss": 3.1243, + "step": 5752 + }, + { + "epoch": 0.93, + "learning_rate": 1.3685791953233883e-05, + "loss": 3.2276, + "step": 5753 + }, + { + "epoch": 0.93, + "learning_rate": 1.3625176710696519e-05, + "loss": 3.1399, + "step": 5754 + }, + { + "epoch": 0.93, + "learning_rate": 1.356469414635525e-05, + "loss": 3.1616, + "step": 5755 + }, + { + "epoch": 0.93, + "learning_rate": 1.3504344276709214e-05, + "loss": 3.0259, + "step": 5756 + }, + { + "epoch": 0.93, + "learning_rate": 1.3444127118221128e-05, + "loss": 3.0629, + "step": 5757 + }, + { + "epoch": 0.93, + "learning_rate": 1.3384042687317632e-05, + "loss": 3.1944, + "step": 5758 + }, + { + "epoch": 0.93, + "learning_rate": 1.3324091000389171e-05, + "loss": 3.1641, + "step": 5759 + }, + { + "epoch": 0.93, + "learning_rate": 1.326427207379005e-05, + "loss": 3.0947, + "step": 5760 + }, + { + "epoch": 0.93, + "learning_rate": 1.3204585923838164e-05, + "loss": 3.0785, + "step": 5761 + }, + { + "epoch": 0.93, + "learning_rate": 1.3145032566815318e-05, + "loss": 3.0591, + "step": 5762 + }, + { + "epoch": 0.93, + "learning_rate": 1.308561201896713e-05, + "loss": 3.0596, + "step": 5763 + }, + { + "epoch": 0.93, + "learning_rate": 1.3026324296502857e-05, + "loss": 3.1481, + "step": 5764 + }, + { + "epoch": 0.93, + "learning_rate": 1.296716941559567e-05, + "loss": 3.1903, + "step": 5765 + }, + { + "epoch": 0.93, + "learning_rate": 1.2908147392382496e-05, + "loss": 3.1231, + "step": 5766 + }, + { + "epoch": 0.93, + "learning_rate": 1.2849258242963734e-05, + "loss": 3.0665, + "step": 5767 + }, + { + "epoch": 0.93, + "learning_rate": 1.2790501983403924e-05, + "loss": 3.2479, + "step": 5768 + }, + { + "epoch": 0.93, + "learning_rate": 1.2731878629731131e-05, + "loss": 3.1062, + "step": 5769 + }, + { + "epoch": 0.93, + "learning_rate": 1.2673388197937231e-05, + "loss": 3.1694, + "step": 5770 + }, + { + "epoch": 0.93, + "learning_rate": 1.2615030703977848e-05, + "loss": 3.0937, + "step": 5771 + }, + { + "epoch": 0.93, + "learning_rate": 1.2556806163772361e-05, + "loss": 3.2145, + "step": 5772 + }, + { + "epoch": 0.93, + "learning_rate": 1.2498714593203674e-05, + "loss": 3.1417, + "step": 5773 + }, + { + "epoch": 0.93, + "learning_rate": 1.2440756008118725e-05, + "loss": 2.958, + "step": 5774 + }, + { + "epoch": 0.93, + "learning_rate": 1.2382930424328087e-05, + "loss": 3.0864, + "step": 5775 + }, + { + "epoch": 0.93, + "learning_rate": 1.232523785760592e-05, + "loss": 3.1011, + "step": 5776 + }, + { + "epoch": 0.93, + "learning_rate": 1.2267678323690135e-05, + "loss": 3.1926, + "step": 5777 + }, + { + "epoch": 0.93, + "learning_rate": 1.221025183828256e-05, + "loss": 3.0757, + "step": 5778 + }, + { + "epoch": 0.93, + "learning_rate": 1.2152958417048388e-05, + "loss": 3.1547, + "step": 5779 + }, + { + "epoch": 0.93, + "learning_rate": 1.209579807561667e-05, + "loss": 3.1484, + "step": 5780 + }, + { + "epoch": 0.93, + "learning_rate": 1.2038770829580437e-05, + "loss": 3.0722, + "step": 5781 + }, + { + "epoch": 0.93, + "learning_rate": 1.1981876694495907e-05, + "loss": 3.2802, + "step": 5782 + }, + { + "epoch": 0.93, + "learning_rate": 1.1925115685883336e-05, + "loss": 3.0681, + "step": 5783 + }, + { + "epoch": 0.93, + "learning_rate": 1.186848781922656e-05, + "loss": 3.1523, + "step": 5784 + }, + { + "epoch": 0.93, + "learning_rate": 1.1811993109973051e-05, + "loss": 3.0257, + "step": 5785 + }, + { + "epoch": 0.93, + "learning_rate": 1.1755631573533987e-05, + "loss": 3.2309, + "step": 5786 + }, + { + "epoch": 0.93, + "learning_rate": 1.1699403225284289e-05, + "loss": 3.1925, + "step": 5787 + }, + { + "epoch": 0.93, + "learning_rate": 1.1643308080562465e-05, + "loss": 3.1396, + "step": 5788 + }, + { + "epoch": 0.93, + "learning_rate": 1.158734615467072e-05, + "loss": 3.01, + "step": 5789 + }, + { + "epoch": 0.93, + "learning_rate": 1.1531517462874952e-05, + "loss": 3.2535, + "step": 5790 + }, + { + "epoch": 0.93, + "learning_rate": 1.1475822020404591e-05, + "loss": 3.0723, + "step": 5791 + }, + { + "epoch": 0.93, + "learning_rate": 1.1420259842452817e-05, + "loss": 2.9622, + "step": 5792 + }, + { + "epoch": 0.93, + "learning_rate": 1.1364830944176453e-05, + "loss": 2.8851, + "step": 5793 + }, + { + "epoch": 0.93, + "learning_rate": 1.1309535340695897e-05, + "loss": 3.0864, + "step": 5794 + }, + { + "epoch": 0.93, + "learning_rate": 1.1254373047095367e-05, + "loss": 3.0258, + "step": 5795 + }, + { + "epoch": 0.93, + "learning_rate": 1.1199344078422491e-05, + "loss": 3.1879, + "step": 5796 + }, + { + "epoch": 0.93, + "learning_rate": 1.1144448449688593e-05, + "loss": 2.9528, + "step": 5797 + }, + { + "epoch": 0.93, + "learning_rate": 1.1089686175868697e-05, + "loss": 3.0667, + "step": 5798 + }, + { + "epoch": 0.94, + "learning_rate": 1.103505727190146e-05, + "loss": 2.96, + "step": 5799 + }, + { + "epoch": 0.94, + "learning_rate": 1.0980561752688967e-05, + "loss": 3.1021, + "step": 5800 + }, + { + "epoch": 0.94, + "learning_rate": 1.0926199633097156e-05, + "loss": 3.0839, + "step": 5801 + }, + { + "epoch": 0.94, + "learning_rate": 1.0871970927955498e-05, + "loss": 2.8838, + "step": 5802 + }, + { + "epoch": 0.94, + "learning_rate": 1.081787565205694e-05, + "loss": 3.2359, + "step": 5803 + }, + { + "epoch": 0.94, + "learning_rate": 1.0763913820158233e-05, + "loss": 3.0948, + "step": 5804 + }, + { + "epoch": 0.94, + "learning_rate": 1.071008544697949e-05, + "loss": 3.1211, + "step": 5805 + }, + { + "epoch": 0.94, + "learning_rate": 1.0656390547204686e-05, + "loss": 3.1752, + "step": 5806 + }, + { + "epoch": 0.94, + "learning_rate": 1.0602829135481162e-05, + "loss": 3.0305, + "step": 5807 + }, + { + "epoch": 0.94, + "learning_rate": 1.0549401226420064e-05, + "loss": 3.0043, + "step": 5808 + }, + { + "epoch": 0.94, + "learning_rate": 1.0496106834595897e-05, + "loss": 3.1328, + "step": 5809 + }, + { + "epoch": 0.94, + "learning_rate": 1.0442945974546813e-05, + "loss": 3.3263, + "step": 5810 + }, + { + "epoch": 0.94, + "learning_rate": 1.03899186607746e-05, + "loss": 3.2463, + "step": 5811 + }, + { + "epoch": 0.94, + "learning_rate": 1.0337024907744574e-05, + "loss": 2.9378, + "step": 5812 + }, + { + "epoch": 0.94, + "learning_rate": 1.028426472988564e-05, + "loss": 3.0917, + "step": 5813 + }, + { + "epoch": 0.94, + "learning_rate": 1.0231638141590227e-05, + "loss": 3.2037, + "step": 5814 + }, + { + "epoch": 0.94, + "learning_rate": 1.017914515721441e-05, + "loss": 3.1373, + "step": 5815 + }, + { + "epoch": 0.94, + "learning_rate": 1.012678579107762e-05, + "loss": 3.1917, + "step": 5816 + }, + { + "epoch": 0.94, + "learning_rate": 1.0074560057463044e-05, + "loss": 2.9772, + "step": 5817 + }, + { + "epoch": 0.94, + "learning_rate": 1.00224679706174e-05, + "loss": 3.194, + "step": 5818 + }, + { + "epoch": 0.94, + "learning_rate": 9.970509544750761e-06, + "loss": 3.0528, + "step": 5819 + }, + { + "epoch": 0.94, + "learning_rate": 9.918684794037015e-06, + "loss": 3.0101, + "step": 5820 + }, + { + "epoch": 0.94, + "learning_rate": 9.86699373261335e-06, + "loss": 3.1465, + "step": 5821 + }, + { + "epoch": 0.94, + "learning_rate": 9.815436374580544e-06, + "loss": 3.0981, + "step": 5822 + }, + { + "epoch": 0.94, + "learning_rate": 9.764012734002958e-06, + "loss": 3.1508, + "step": 5823 + }, + { + "epoch": 0.94, + "learning_rate": 9.712722824908426e-06, + "loss": 3.1847, + "step": 5824 + }, + { + "epoch": 0.94, + "learning_rate": 9.661566661288423e-06, + "loss": 3.1078, + "step": 5825 + }, + { + "epoch": 0.94, + "learning_rate": 9.610544257097731e-06, + "loss": 2.9871, + "step": 5826 + }, + { + "epoch": 0.94, + "learning_rate": 9.559655626254826e-06, + "loss": 3.2144, + "step": 5827 + }, + { + "epoch": 0.94, + "learning_rate": 9.50890078264155e-06, + "loss": 3.2244, + "step": 5828 + }, + { + "epoch": 0.94, + "learning_rate": 9.458279740103327e-06, + "loss": 3.0702, + "step": 5829 + }, + { + "epoch": 0.94, + "learning_rate": 9.407792512449109e-06, + "loss": 3.2553, + "step": 5830 + }, + { + "epoch": 0.94, + "learning_rate": 9.357439113451327e-06, + "loss": 3.1375, + "step": 5831 + }, + { + "epoch": 0.94, + "learning_rate": 9.307219556845826e-06, + "loss": 3.1523, + "step": 5832 + }, + { + "epoch": 0.94, + "learning_rate": 9.257133856332034e-06, + "loss": 3.1633, + "step": 5833 + }, + { + "epoch": 0.94, + "learning_rate": 9.207182025572802e-06, + "loss": 3.1104, + "step": 5834 + }, + { + "epoch": 0.94, + "learning_rate": 9.157364078194509e-06, + "loss": 3.063, + "step": 5835 + }, + { + "epoch": 0.94, + "learning_rate": 9.107680027787058e-06, + "loss": 3.2492, + "step": 5836 + }, + { + "epoch": 0.94, + "learning_rate": 9.05812988790361e-06, + "loss": 3.0196, + "step": 5837 + }, + { + "epoch": 0.94, + "learning_rate": 9.008713672061075e-06, + "loss": 3.2821, + "step": 5838 + }, + { + "epoch": 0.94, + "learning_rate": 8.959431393739726e-06, + "loss": 3.0367, + "step": 5839 + }, + { + "epoch": 0.94, + "learning_rate": 8.91028306638314e-06, + "loss": 3.0892, + "step": 5840 + }, + { + "epoch": 0.94, + "learning_rate": 8.861268703398596e-06, + "loss": 3.2131, + "step": 5841 + }, + { + "epoch": 0.94, + "learning_rate": 8.812388318156672e-06, + "loss": 3.2442, + "step": 5842 + }, + { + "epoch": 0.94, + "learning_rate": 8.763641923991483e-06, + "loss": 3.1646, + "step": 5843 + }, + { + "epoch": 0.94, + "learning_rate": 8.715029534200503e-06, + "loss": 3.2161, + "step": 5844 + }, + { + "epoch": 0.94, + "learning_rate": 8.666551162044845e-06, + "loss": 3.1733, + "step": 5845 + }, + { + "epoch": 0.94, + "learning_rate": 8.618206820748764e-06, + "loss": 3.0776, + "step": 5846 + }, + { + "epoch": 0.94, + "learning_rate": 8.569996523500212e-06, + "loss": 3.2024, + "step": 5847 + }, + { + "epoch": 0.94, + "learning_rate": 8.52192028345039e-06, + "loss": 3.0798, + "step": 5848 + }, + { + "epoch": 0.94, + "learning_rate": 8.473978113714143e-06, + "loss": 3.1359, + "step": 5849 + }, + { + "epoch": 0.94, + "learning_rate": 8.426170027369451e-06, + "loss": 3.1325, + "step": 5850 + }, + { + "epoch": 0.94, + "learning_rate": 8.378496037458106e-06, + "loss": 3.1476, + "step": 5851 + }, + { + "epoch": 0.94, + "learning_rate": 8.33095615698487e-06, + "loss": 3.0812, + "step": 5852 + }, + { + "epoch": 0.94, + "learning_rate": 8.28355039891826e-06, + "loss": 3.0894, + "step": 5853 + }, + { + "epoch": 0.94, + "learning_rate": 8.236278776190097e-06, + "loss": 3.0367, + "step": 5854 + }, + { + "epoch": 0.94, + "learning_rate": 8.189141301695568e-06, + "loss": 3.2506, + "step": 5855 + }, + { + "epoch": 0.94, + "learning_rate": 8.14213798829333e-06, + "loss": 3.1903, + "step": 5856 + }, + { + "epoch": 0.94, + "learning_rate": 8.095268848805404e-06, + "loss": 3.2277, + "step": 5857 + }, + { + "epoch": 0.94, + "learning_rate": 8.04853389601723e-06, + "loss": 3.1115, + "step": 5858 + }, + { + "epoch": 0.94, + "learning_rate": 8.001933142677664e-06, + "loss": 3.2206, + "step": 5859 + }, + { + "epoch": 0.94, + "learning_rate": 7.955466601498873e-06, + "loss": 3.2783, + "step": 5860 + }, + { + "epoch": 0.95, + "learning_rate": 7.90913428515655e-06, + "loss": 3.2288, + "step": 5861 + }, + { + "epoch": 0.95, + "learning_rate": 7.862936206289529e-06, + "loss": 3.1356, + "step": 5862 + }, + { + "epoch": 0.95, + "learning_rate": 7.816872377500395e-06, + "loss": 3.0367, + "step": 5863 + }, + { + "epoch": 0.95, + "learning_rate": 7.770942811354764e-06, + "loss": 3.1318, + "step": 5864 + }, + { + "epoch": 0.95, + "learning_rate": 7.725147520381781e-06, + "loss": 3.0605, + "step": 5865 + }, + { + "epoch": 0.95, + "learning_rate": 7.679486517073953e-06, + "loss": 3.27, + "step": 5866 + }, + { + "epoch": 0.95, + "learning_rate": 7.633959813887148e-06, + "loss": 3.1798, + "step": 5867 + }, + { + "epoch": 0.95, + "learning_rate": 7.588567423240544e-06, + "loss": 3.1512, + "step": 5868 + }, + { + "epoch": 0.95, + "learning_rate": 7.543309357516848e-06, + "loss": 3.0595, + "step": 5869 + }, + { + "epoch": 0.95, + "learning_rate": 7.4981856290619595e-06, + "loss": 3.087, + "step": 5870 + }, + { + "epoch": 0.95, + "learning_rate": 7.453196250185146e-06, + "loss": 3.1736, + "step": 5871 + }, + { + "epoch": 0.95, + "learning_rate": 7.408341233159088e-06, + "loss": 2.906, + "step": 5872 + }, + { + "epoch": 0.95, + "learning_rate": 7.3636205902197776e-06, + "loss": 3.0359, + "step": 5873 + }, + { + "epoch": 0.95, + "learning_rate": 7.31903433356651e-06, + "loss": 3.2488, + "step": 5874 + }, + { + "epoch": 0.95, + "learning_rate": 7.274582475362057e-06, + "loss": 3.2926, + "step": 5875 + }, + { + "epoch": 0.95, + "learning_rate": 7.230265027732441e-06, + "loss": 3.0448, + "step": 5876 + }, + { + "epoch": 0.95, + "learning_rate": 7.186082002766991e-06, + "loss": 3.1184, + "step": 5877 + }, + { + "epoch": 0.95, + "learning_rate": 7.142033412518345e-06, + "loss": 3.1051, + "step": 5878 + }, + { + "epoch": 0.95, + "learning_rate": 7.098119269002612e-06, + "loss": 3.1055, + "step": 5879 + }, + { + "epoch": 0.95, + "learning_rate": 7.0543395841989875e-06, + "loss": 3.1803, + "step": 5880 + }, + { + "epoch": 0.95, + "learning_rate": 7.010694370050308e-06, + "loss": 3.1889, + "step": 5881 + }, + { + "epoch": 0.95, + "learning_rate": 6.967183638462493e-06, + "loss": 3.1123, + "step": 5882 + }, + { + "epoch": 0.95, + "learning_rate": 6.9238074013047715e-06, + "loss": 3.0366, + "step": 5883 + }, + { + "epoch": 0.95, + "learning_rate": 6.880565670409789e-06, + "loss": 3.0802, + "step": 5884 + }, + { + "epoch": 0.95, + "learning_rate": 6.837458457573498e-06, + "loss": 3.0471, + "step": 5885 + }, + { + "epoch": 0.95, + "learning_rate": 6.794485774555048e-06, + "loss": 2.945, + "step": 5886 + }, + { + "epoch": 0.95, + "learning_rate": 6.7516476330769514e-06, + "loss": 3.3361, + "step": 5887 + }, + { + "epoch": 0.95, + "learning_rate": 6.708944044825138e-06, + "loss": 3.0841, + "step": 5888 + }, + { + "epoch": 0.95, + "learning_rate": 6.666375021448623e-06, + "loss": 3.254, + "step": 5889 + }, + { + "epoch": 0.95, + "learning_rate": 6.623940574559839e-06, + "loss": 3.2136, + "step": 5890 + }, + { + "epoch": 0.95, + "learning_rate": 6.581640715734472e-06, + "loss": 3.1139, + "step": 5891 + }, + { + "epoch": 0.95, + "learning_rate": 6.539475456511512e-06, + "loss": 3.1468, + "step": 5892 + }, + { + "epoch": 0.95, + "learning_rate": 6.497444808393149e-06, + "loss": 3.0984, + "step": 5893 + }, + { + "epoch": 0.95, + "learning_rate": 6.4555487828450445e-06, + "loss": 3.0068, + "step": 5894 + }, + { + "epoch": 0.95, + "learning_rate": 6.413787391295944e-06, + "loss": 2.9969, + "step": 5895 + }, + { + "epoch": 0.95, + "learning_rate": 6.372160645137903e-06, + "loss": 3.12, + "step": 5896 + }, + { + "epoch": 0.95, + "learning_rate": 6.330668555726393e-06, + "loss": 2.9608, + "step": 5897 + }, + { + "epoch": 0.95, + "learning_rate": 6.289311134380027e-06, + "loss": 2.9508, + "step": 5898 + }, + { + "epoch": 0.95, + "learning_rate": 6.248088392380502e-06, + "loss": 3.1603, + "step": 5899 + }, + { + "epoch": 0.95, + "learning_rate": 6.207000340973268e-06, + "loss": 3.1351, + "step": 5900 + }, + { + "epoch": 0.95, + "learning_rate": 6.166046991366525e-06, + "loss": 3.1398, + "step": 5901 + }, + { + "epoch": 0.95, + "learning_rate": 6.1252283547320046e-06, + "loss": 3.0199, + "step": 5902 + }, + { + "epoch": 0.95, + "learning_rate": 6.084544442204576e-06, + "loss": 3.1157, + "step": 5903 + }, + { + "epoch": 0.95, + "learning_rate": 6.043995264882529e-06, + "loss": 3.1596, + "step": 5904 + }, + { + "epoch": 0.95, + "learning_rate": 6.003580833827127e-06, + "loss": 3.1201, + "step": 5905 + }, + { + "epoch": 0.95, + "learning_rate": 5.963301160063106e-06, + "loss": 3.2139, + "step": 5906 + }, + { + "epoch": 0.95, + "learning_rate": 5.923156254578343e-06, + "loss": 3.0825, + "step": 5907 + }, + { + "epoch": 0.95, + "learning_rate": 5.883146128323913e-06, + "loss": 3.23, + "step": 5908 + }, + { + "epoch": 0.95, + "learning_rate": 5.843270792214306e-06, + "loss": 3.1292, + "step": 5909 + }, + { + "epoch": 0.95, + "learning_rate": 5.8035302571270435e-06, + "loss": 3.1517, + "step": 5910 + }, + { + "epoch": 0.95, + "learning_rate": 5.763924533902898e-06, + "loss": 3.1377, + "step": 5911 + }, + { + "epoch": 0.95, + "learning_rate": 5.724453633345949e-06, + "loss": 3.0958, + "step": 5912 + }, + { + "epoch": 0.95, + "learning_rate": 5.68511756622353e-06, + "loss": 3.1083, + "step": 5913 + }, + { + "epoch": 0.95, + "learning_rate": 5.645916343266056e-06, + "loss": 3.1886, + "step": 5914 + }, + { + "epoch": 0.95, + "learning_rate": 5.606849975167194e-06, + "loss": 3.0474, + "step": 5915 + }, + { + "epoch": 0.95, + "learning_rate": 5.567918472584032e-06, + "loss": 3.2674, + "step": 5916 + }, + { + "epoch": 0.95, + "learning_rate": 5.529121846136465e-06, + "loss": 3.2918, + "step": 5917 + }, + { + "epoch": 0.95, + "learning_rate": 5.4904601064079126e-06, + "loss": 3.0303, + "step": 5918 + }, + { + "epoch": 0.95, + "learning_rate": 5.451933263945053e-06, + "loss": 3.1391, + "step": 5919 + }, + { + "epoch": 0.95, + "learning_rate": 5.413541329257421e-06, + "loss": 3.2609, + "step": 5920 + }, + { + "epoch": 0.95, + "learning_rate": 5.3752843128180855e-06, + "loss": 3.1735, + "step": 5921 + }, + { + "epoch": 0.95, + "learning_rate": 5.337162225063141e-06, + "loss": 3.1581, + "step": 5922 + }, + { + "epoch": 0.96, + "learning_rate": 5.299175076391827e-06, + "loss": 3.1932, + "step": 5923 + }, + { + "epoch": 0.96, + "learning_rate": 5.261322877166741e-06, + "loss": 3.148, + "step": 5924 + }, + { + "epoch": 0.96, + "learning_rate": 5.22360563771368e-06, + "loss": 3.0843, + "step": 5925 + }, + { + "epoch": 0.96, + "learning_rate": 5.1860233683213576e-06, + "loss": 3.1876, + "step": 5926 + }, + { + "epoch": 0.96, + "learning_rate": 5.148576079241907e-06, + "loss": 3.1561, + "step": 5927 + }, + { + "epoch": 0.96, + "learning_rate": 5.1112637806906e-06, + "loss": 3.2201, + "step": 5928 + }, + { + "epoch": 0.96, + "learning_rate": 5.074086482845852e-06, + "loss": 3.1928, + "step": 5929 + }, + { + "epoch": 0.96, + "learning_rate": 5.037044195849216e-06, + "loss": 3.2039, + "step": 5930 + }, + { + "epoch": 0.96, + "learning_rate": 5.000136929805443e-06, + "loss": 3.1518, + "step": 5931 + }, + { + "epoch": 0.96, + "learning_rate": 4.9633646947825905e-06, + "loss": 3.1451, + "step": 5932 + }, + { + "epoch": 0.96, + "learning_rate": 4.926727500811634e-06, + "loss": 2.879, + "step": 5933 + }, + { + "epoch": 0.96, + "learning_rate": 4.890225357886913e-06, + "loss": 3.1746, + "step": 5934 + }, + { + "epoch": 0.96, + "learning_rate": 4.853858275965794e-06, + "loss": 3.0657, + "step": 5935 + }, + { + "epoch": 0.96, + "learning_rate": 4.8176262649689e-06, + "loss": 3.1246, + "step": 5936 + }, + { + "epoch": 0.96, + "learning_rate": 4.781529334779877e-06, + "loss": 3.0573, + "step": 5937 + }, + { + "epoch": 0.96, + "learning_rate": 4.74556749524574e-06, + "loss": 3.2034, + "step": 5938 + }, + { + "epoch": 0.96, + "learning_rate": 4.709740756176362e-06, + "loss": 3.2719, + "step": 5939 + }, + { + "epoch": 0.96, + "learning_rate": 4.674049127345093e-06, + "loss": 3.0877, + "step": 5940 + }, + { + "epoch": 0.96, + "learning_rate": 4.638492618488088e-06, + "loss": 2.9772, + "step": 5941 + }, + { + "epoch": 0.96, + "learning_rate": 4.603071239304924e-06, + "loss": 3.247, + "step": 5942 + }, + { + "epoch": 0.96, + "learning_rate": 4.567784999458147e-06, + "loss": 3.017, + "step": 5943 + }, + { + "epoch": 0.96, + "learning_rate": 4.5326339085735025e-06, + "loss": 3.204, + "step": 5944 + }, + { + "epoch": 0.96, + "learning_rate": 4.49761797623982e-06, + "loss": 3.1675, + "step": 5945 + }, + { + "epoch": 0.96, + "learning_rate": 4.462737212009182e-06, + "loss": 3.1458, + "step": 5946 + }, + { + "epoch": 0.96, + "learning_rate": 4.427991625396644e-06, + "loss": 3.0829, + "step": 5947 + }, + { + "epoch": 0.96, + "learning_rate": 4.39338122588051e-06, + "loss": 3.2146, + "step": 5948 + }, + { + "epoch": 0.96, + "learning_rate": 4.358906022902065e-06, + "loss": 3.1524, + "step": 5949 + }, + { + "epoch": 0.96, + "learning_rate": 4.324566025865895e-06, + "loss": 3.144, + "step": 5950 + }, + { + "epoch": 0.96, + "learning_rate": 4.290361244139506e-06, + "loss": 3.1683, + "step": 5951 + }, + { + "epoch": 0.96, + "learning_rate": 4.256291687053715e-06, + "loss": 3.2288, + "step": 5952 + }, + { + "epoch": 0.96, + "learning_rate": 4.222357363902363e-06, + "loss": 3.2062, + "step": 5953 + }, + { + "epoch": 0.96, + "learning_rate": 4.188558283942323e-06, + "loss": 3.0767, + "step": 5954 + }, + { + "epoch": 0.96, + "learning_rate": 4.154894456393665e-06, + "loss": 3.1157, + "step": 5955 + }, + { + "epoch": 0.96, + "learning_rate": 4.121365890439544e-06, + "loss": 3.1962, + "step": 5956 + }, + { + "epoch": 0.96, + "learning_rate": 4.087972595226252e-06, + "loss": 3.3033, + "step": 5957 + }, + { + "epoch": 0.96, + "learning_rate": 4.054714579863117e-06, + "loss": 3.1311, + "step": 5958 + }, + { + "epoch": 0.96, + "learning_rate": 4.021591853422601e-06, + "loss": 3.0704, + "step": 5959 + }, + { + "epoch": 0.96, + "learning_rate": 3.988604424940257e-06, + "loss": 3.2803, + "step": 5960 + }, + { + "epoch": 0.96, + "learning_rate": 3.955752303414717e-06, + "loss": 3.1347, + "step": 5961 + }, + { + "epoch": 0.96, + "learning_rate": 3.9230354978077584e-06, + "loss": 3.1817, + "step": 5962 + }, + { + "epoch": 0.96, + "learning_rate": 3.890454017044076e-06, + "loss": 3.012, + "step": 5963 + }, + { + "epoch": 0.96, + "learning_rate": 3.8580078700117264e-06, + "loss": 2.9873, + "step": 5964 + }, + { + "epoch": 0.96, + "learning_rate": 3.825697065561629e-06, + "loss": 3.2002, + "step": 5965 + }, + { + "epoch": 0.96, + "learning_rate": 3.7935216125078443e-06, + "loss": 3.1818, + "step": 5966 + }, + { + "epoch": 0.96, + "learning_rate": 3.7614815196274633e-06, + "loss": 3.127, + "step": 5967 + }, + { + "epoch": 0.96, + "learning_rate": 3.7295767956607717e-06, + "loss": 3.1516, + "step": 5968 + }, + { + "epoch": 0.96, + "learning_rate": 3.6978074493110303e-06, + "loss": 3.1372, + "step": 5969 + }, + { + "epoch": 0.96, + "learning_rate": 3.6661734892446396e-06, + "loss": 3.1127, + "step": 5970 + }, + { + "epoch": 0.96, + "learning_rate": 3.6346749240910304e-06, + "loss": 2.9377, + "step": 5971 + }, + { + "epoch": 0.96, + "learning_rate": 3.603311762442607e-06, + "loss": 3.2213, + "step": 5972 + }, + { + "epoch": 0.96, + "learning_rate": 3.5720840128550256e-06, + "loss": 3.1827, + "step": 5973 + }, + { + "epoch": 0.96, + "learning_rate": 3.540991683846806e-06, + "loss": 3.0245, + "step": 5974 + }, + { + "epoch": 0.96, + "learning_rate": 3.5100347838997183e-06, + "loss": 3.1961, + "step": 5975 + }, + { + "epoch": 0.96, + "learning_rate": 3.4792133214585074e-06, + "loss": 3.1742, + "step": 5976 + }, + { + "epoch": 0.96, + "learning_rate": 3.448527304930893e-06, + "loss": 3.0325, + "step": 5977 + }, + { + "epoch": 0.96, + "learning_rate": 3.4179767426876784e-06, + "loss": 3.1653, + "step": 5978 + }, + { + "epoch": 0.96, + "learning_rate": 3.387561643062864e-06, + "loss": 3.1377, + "step": 5979 + }, + { + "epoch": 0.96, + "learning_rate": 3.3572820143533133e-06, + "loss": 3.1327, + "step": 5980 + }, + { + "epoch": 0.96, + "learning_rate": 3.3271378648190296e-06, + "loss": 3.0636, + "step": 5981 + }, + { + "epoch": 0.96, + "learning_rate": 3.2971292026829915e-06, + "loss": 3.1783, + "step": 5982 + }, + { + "epoch": 0.96, + "learning_rate": 3.267256036131372e-06, + "loss": 3.3819, + "step": 5983 + }, + { + "epoch": 0.96, + "learning_rate": 3.237518373313153e-06, + "loss": 3.1377, + "step": 5984 + }, + { + "epoch": 0.97, + "learning_rate": 3.2079162223405123e-06, + "loss": 3.2663, + "step": 5985 + }, + { + "epoch": 0.97, + "learning_rate": 3.178449591288657e-06, + "loss": 3.12, + "step": 5986 + }, + { + "epoch": 0.97, + "learning_rate": 3.149118488195768e-06, + "loss": 3.1093, + "step": 5987 + }, + { + "epoch": 0.97, + "learning_rate": 3.1199229210630565e-06, + "loss": 3.1641, + "step": 5988 + }, + { + "epoch": 0.97, + "learning_rate": 3.090862897854818e-06, + "loss": 3.2216, + "step": 5989 + }, + { + "epoch": 0.97, + "learning_rate": 3.061938426498323e-06, + "loss": 2.9737, + "step": 5990 + }, + { + "epoch": 0.97, + "learning_rate": 3.033149514883815e-06, + "loss": 3.285, + "step": 5991 + }, + { + "epoch": 0.97, + "learning_rate": 3.0044961708647343e-06, + "loss": 2.9358, + "step": 5992 + }, + { + "epoch": 0.97, + "learning_rate": 2.975978402257329e-06, + "loss": 3.2881, + "step": 5993 + }, + { + "epoch": 0.97, + "learning_rate": 2.947596216841042e-06, + "loss": 3.2222, + "step": 5994 + }, + { + "epoch": 0.97, + "learning_rate": 2.9193496223581805e-06, + "loss": 3.1006, + "step": 5995 + }, + { + "epoch": 0.97, + "learning_rate": 2.8912386265141923e-06, + "loss": 2.9476, + "step": 5996 + }, + { + "epoch": 0.97, + "learning_rate": 2.8632632369774424e-06, + "loss": 3.0084, + "step": 5997 + }, + { + "epoch": 0.97, + "learning_rate": 2.8354234613793275e-06, + "loss": 3.2239, + "step": 5998 + }, + { + "epoch": 0.97, + "learning_rate": 2.8077193073142713e-06, + "loss": 3.1365, + "step": 5999 + }, + { + "epoch": 0.97, + "learning_rate": 2.7801507823397297e-06, + "loss": 3.1141, + "step": 6000 + }, + { + "epoch": 0.97, + "learning_rate": 2.7527178939760754e-06, + "loss": 3.1217, + "step": 6001 + }, + { + "epoch": 0.97, + "learning_rate": 2.7254206497067114e-06, + "loss": 3.0611, + "step": 6002 + }, + { + "epoch": 0.97, + "learning_rate": 2.698259056978125e-06, + "loss": 3.2002, + "step": 6003 + }, + { + "epoch": 0.97, + "learning_rate": 2.671233123199668e-06, + "loss": 3.0797, + "step": 6004 + }, + { + "epoch": 0.97, + "learning_rate": 2.6443428557437753e-06, + "loss": 3.0632, + "step": 6005 + }, + { + "epoch": 0.97, + "learning_rate": 2.617588261945747e-06, + "loss": 3.1875, + "step": 6006 + }, + { + "epoch": 0.97, + "learning_rate": 2.590969349104133e-06, + "loss": 3.2222, + "step": 6007 + }, + { + "epoch": 0.97, + "learning_rate": 2.5644861244802366e-06, + "loss": 3.2405, + "step": 6008 + }, + { + "epoch": 0.97, + "learning_rate": 2.5381385952983337e-06, + "loss": 3.0538, + "step": 6009 + }, + { + "epoch": 0.97, + "learning_rate": 2.511926768745898e-06, + "loss": 3.045, + "step": 6010 + }, + { + "epoch": 0.97, + "learning_rate": 2.4858506519732095e-06, + "loss": 3.2631, + "step": 6011 + }, + { + "epoch": 0.97, + "learning_rate": 2.459910252093467e-06, + "loss": 3.1434, + "step": 6012 + }, + { + "epoch": 0.97, + "learning_rate": 2.434105576183121e-06, + "loss": 3.1258, + "step": 6013 + }, + { + "epoch": 0.97, + "learning_rate": 2.408436631281319e-06, + "loss": 3.2163, + "step": 6014 + }, + { + "epoch": 0.97, + "learning_rate": 2.382903424390348e-06, + "loss": 3.1349, + "step": 6015 + }, + { + "epoch": 0.97, + "learning_rate": 2.3575059624754147e-06, + "loss": 3.0948, + "step": 6016 + }, + { + "epoch": 0.97, + "learning_rate": 2.3322442524646436e-06, + "loss": 3.1551, + "step": 6017 + }, + { + "epoch": 0.97, + "learning_rate": 2.3071183012491337e-06, + "loss": 3.1125, + "step": 6018 + }, + { + "epoch": 0.97, + "learning_rate": 2.282128115683124e-06, + "loss": 3.1164, + "step": 6019 + }, + { + "epoch": 0.97, + "learning_rate": 2.257273702583662e-06, + "loss": 3.0705, + "step": 6020 + }, + { + "epoch": 0.97, + "learning_rate": 2.232555068730713e-06, + "loss": 3.3095, + "step": 6021 + }, + { + "epoch": 0.97, + "learning_rate": 2.2079722208672714e-06, + "loss": 3.1459, + "step": 6022 + }, + { + "epoch": 0.97, + "learning_rate": 2.183525165699418e-06, + "loss": 3.0908, + "step": 6023 + }, + { + "epoch": 0.97, + "learning_rate": 2.159213909895874e-06, + "loss": 3.0842, + "step": 6024 + }, + { + "epoch": 0.97, + "learning_rate": 2.1350384600886675e-06, + "loss": 3.1292, + "step": 6025 + }, + { + "epoch": 0.97, + "learning_rate": 2.1109988228725786e-06, + "loss": 3.1083, + "step": 6026 + }, + { + "epoch": 0.97, + "learning_rate": 2.087095004805306e-06, + "loss": 3.0559, + "step": 6027 + }, + { + "epoch": 0.97, + "learning_rate": 2.063327012407634e-06, + "loss": 3.1821, + "step": 6028 + }, + { + "epoch": 0.97, + "learning_rate": 2.0396948521632653e-06, + "loss": 3.1748, + "step": 6029 + }, + { + "epoch": 0.97, + "learning_rate": 2.0161985305187647e-06, + "loss": 3.1428, + "step": 6030 + }, + { + "epoch": 0.97, + "learning_rate": 1.9928380538837278e-06, + "loss": 3.141, + "step": 6031 + }, + { + "epoch": 0.97, + "learning_rate": 1.969613428630668e-06, + "loss": 3.0225, + "step": 6032 + }, + { + "epoch": 0.97, + "learning_rate": 1.9465246610949616e-06, + "loss": 3.2515, + "step": 6033 + }, + { + "epoch": 0.97, + "learning_rate": 1.9235717575750155e-06, + "loss": 3.2018, + "step": 6034 + }, + { + "epoch": 0.97, + "learning_rate": 1.9007547243322099e-06, + "loss": 3.0552, + "step": 6035 + }, + { + "epoch": 0.97, + "learning_rate": 1.878073567590788e-06, + "loss": 3.0947, + "step": 6036 + }, + { + "epoch": 0.97, + "learning_rate": 1.8555282935378559e-06, + "loss": 3.2657, + "step": 6037 + }, + { + "epoch": 0.97, + "learning_rate": 1.8331189083237166e-06, + "loss": 3.0422, + "step": 6038 + }, + { + "epoch": 0.97, + "learning_rate": 1.8108454180612577e-06, + "loss": 3.1003, + "step": 6039 + }, + { + "epoch": 0.97, + "learning_rate": 1.788707828826508e-06, + "loss": 2.9106, + "step": 6040 + }, + { + "epoch": 0.97, + "learning_rate": 1.7667061466584144e-06, + "loss": 3.107, + "step": 6041 + }, + { + "epoch": 0.97, + "learning_rate": 1.7448403775587873e-06, + "loss": 3.0328, + "step": 6042 + }, + { + "epoch": 0.97, + "learning_rate": 1.7231105274924108e-06, + "loss": 3.1148, + "step": 6043 + }, + { + "epoch": 0.97, + "learning_rate": 1.7015166023869878e-06, + "loss": 2.977, + "step": 6044 + }, + { + "epoch": 0.97, + "learning_rate": 1.6800586081330282e-06, + "loss": 3.2733, + "step": 6045 + }, + { + "epoch": 0.97, + "learning_rate": 1.6587365505841833e-06, + "loss": 3.2722, + "step": 6046 + }, + { + "epoch": 0.98, + "learning_rate": 1.637550435556856e-06, + "loss": 2.9988, + "step": 6047 + }, + { + "epoch": 0.98, + "learning_rate": 1.616500268830423e-06, + "loss": 3.1488, + "step": 6048 + }, + { + "epoch": 0.98, + "learning_rate": 1.5955860561470691e-06, + "loss": 3.2635, + "step": 6049 + }, + { + "epoch": 0.98, + "learning_rate": 1.5748078032120638e-06, + "loss": 3.0553, + "step": 6050 + }, + { + "epoch": 0.98, + "learning_rate": 1.5541655156934841e-06, + "loss": 3.2884, + "step": 6051 + }, + { + "epoch": 0.98, + "learning_rate": 1.5336591992223814e-06, + "loss": 3.1941, + "step": 6052 + }, + { + "epoch": 0.98, + "learning_rate": 1.5132888593925586e-06, + "loss": 3.0921, + "step": 6053 + }, + { + "epoch": 0.98, + "learning_rate": 1.4930545017610152e-06, + "loss": 3.0898, + "step": 6054 + }, + { + "epoch": 0.98, + "learning_rate": 1.472956131847336e-06, + "loss": 3.0606, + "step": 6055 + }, + { + "epoch": 0.98, + "learning_rate": 1.452993755134191e-06, + "loss": 3.0345, + "step": 6056 + }, + { + "epoch": 0.98, + "learning_rate": 1.4331673770671683e-06, + "loss": 3.134, + "step": 6057 + }, + { + "epoch": 0.98, + "learning_rate": 1.413477003054664e-06, + "loss": 3.2434, + "step": 6058 + }, + { + "epoch": 0.98, + "learning_rate": 1.393922638468048e-06, + "loss": 2.9231, + "step": 6059 + }, + { + "epoch": 0.98, + "learning_rate": 1.3745042886414983e-06, + "loss": 3.0694, + "step": 6060 + }, + { + "epoch": 0.98, + "learning_rate": 1.3552219588721659e-06, + "loss": 3.147, + "step": 6061 + }, + { + "epoch": 0.98, + "learning_rate": 1.3360756544201213e-06, + "loss": 3.1625, + "step": 6062 + }, + { + "epoch": 0.98, + "learning_rate": 1.3170653805082423e-06, + "loss": 3.0558, + "step": 6063 + }, + { + "epoch": 0.98, + "learning_rate": 1.2981911423223248e-06, + "loss": 3.1825, + "step": 6064 + }, + { + "epoch": 0.98, + "learning_rate": 1.2794529450111392e-06, + "loss": 3.0919, + "step": 6065 + }, + { + "epoch": 0.98, + "learning_rate": 1.2608507936862635e-06, + "loss": 3.0549, + "step": 6066 + }, + { + "epoch": 0.98, + "learning_rate": 1.2423846934220829e-06, + "loss": 3.1738, + "step": 6067 + }, + { + "epoch": 0.98, + "learning_rate": 1.2240546492560678e-06, + "loss": 3.1592, + "step": 6068 + }, + { + "epoch": 0.98, + "learning_rate": 1.2058606661884964e-06, + "loss": 3.1206, + "step": 6069 + }, + { + "epoch": 0.98, + "learning_rate": 1.1878027491823984e-06, + "loss": 3.2168, + "step": 6070 + }, + { + "epoch": 0.98, + "learning_rate": 1.1698809031638336e-06, + "loss": 3.1052, + "step": 6071 + }, + { + "epoch": 0.98, + "learning_rate": 1.15209513302178e-06, + "loss": 3.0692, + "step": 6072 + }, + { + "epoch": 0.98, + "learning_rate": 1.134445443607912e-06, + "loss": 3.0483, + "step": 6073 + }, + { + "epoch": 0.98, + "learning_rate": 1.1169318397369344e-06, + "loss": 3.0828, + "step": 6074 + }, + { + "epoch": 0.98, + "learning_rate": 1.0995543261863584e-06, + "loss": 3.2017, + "step": 6075 + }, + { + "epoch": 0.98, + "learning_rate": 1.0823129076966142e-06, + "loss": 3.2117, + "step": 6076 + }, + { + "epoch": 0.98, + "learning_rate": 1.065207588971051e-06, + "loss": 3.0492, + "step": 6077 + }, + { + "epoch": 0.98, + "learning_rate": 1.048238374675714e-06, + "loss": 3.2024, + "step": 6078 + }, + { + "epoch": 0.98, + "learning_rate": 1.0314052694397335e-06, + "loss": 3.0089, + "step": 6079 + }, + { + "epoch": 0.98, + "learning_rate": 1.0147082778549367e-06, + "loss": 3.1902, + "step": 6080 + }, + { + "epoch": 0.98, + "learning_rate": 9.98147404476124e-07, + "loss": 3.2471, + "step": 6081 + }, + { + "epoch": 0.98, + "learning_rate": 9.8172265382096e-07, + "loss": 3.2112, + "step": 6082 + }, + { + "epoch": 0.98, + "learning_rate": 9.654340303699161e-07, + "loss": 3.1345, + "step": 6083 + }, + { + "epoch": 0.98, + "learning_rate": 9.492815385664377e-07, + "loss": 3.1793, + "step": 6084 + }, + { + "epoch": 0.98, + "learning_rate": 9.332651828166672e-07, + "loss": 2.969, + "step": 6085 + }, + { + "epoch": 0.98, + "learning_rate": 9.173849674897206e-07, + "loss": 3.2883, + "step": 6086 + }, + { + "epoch": 0.98, + "learning_rate": 9.016408969176326e-07, + "loss": 3.2068, + "step": 6087 + }, + { + "epoch": 0.98, + "learning_rate": 8.860329753951901e-07, + "loss": 3.2056, + "step": 6088 + }, + { + "epoch": 0.98, + "learning_rate": 8.705612071800429e-07, + "loss": 2.9873, + "step": 6089 + }, + { + "epoch": 0.98, + "learning_rate": 8.552255964928146e-07, + "loss": 3.0509, + "step": 6090 + }, + { + "epoch": 0.98, + "learning_rate": 8.400261475168258e-07, + "loss": 3.2052, + "step": 6091 + }, + { + "epoch": 0.98, + "learning_rate": 8.249628643983708e-07, + "loss": 3.1778, + "step": 6092 + }, + { + "epoch": 0.98, + "learning_rate": 8.100357512466072e-07, + "loss": 2.9292, + "step": 6093 + }, + { + "epoch": 0.98, + "learning_rate": 7.952448121333888e-07, + "loss": 3.1732, + "step": 6094 + }, + { + "epoch": 0.98, + "learning_rate": 7.805900510936547e-07, + "loss": 2.97, + "step": 6095 + }, + { + "epoch": 0.98, + "learning_rate": 7.66071472124985e-07, + "loss": 2.967, + "step": 6096 + }, + { + "epoch": 0.98, + "learning_rate": 7.516890791879339e-07, + "loss": 3.1864, + "step": 6097 + }, + { + "epoch": 0.98, + "learning_rate": 7.374428762059182e-07, + "loss": 3.1148, + "step": 6098 + }, + { + "epoch": 0.98, + "learning_rate": 7.233328670651073e-07, + "loss": 3.08, + "step": 6099 + }, + { + "epoch": 0.98, + "learning_rate": 7.093590556145891e-07, + "loss": 3.1764, + "step": 6100 + }, + { + "epoch": 0.98, + "learning_rate": 6.955214456663139e-07, + "loss": 2.9644, + "step": 6101 + }, + { + "epoch": 0.98, + "learning_rate": 6.818200409949849e-07, + "loss": 3.0514, + "step": 6102 + }, + { + "epoch": 0.98, + "learning_rate": 6.682548453382231e-07, + "loss": 3.2485, + "step": 6103 + }, + { + "epoch": 0.98, + "learning_rate": 6.548258623965131e-07, + "loss": 3.1937, + "step": 6104 + }, + { + "epoch": 0.98, + "learning_rate": 6.415330958330912e-07, + "loss": 3.1076, + "step": 6105 + }, + { + "epoch": 0.98, + "learning_rate": 6.283765492741678e-07, + "loss": 2.9434, + "step": 6106 + }, + { + "epoch": 0.98, + "learning_rate": 6.153562263086498e-07, + "loss": 3.0277, + "step": 6107 + }, + { + "epoch": 0.98, + "learning_rate": 6.02472130488363e-07, + "loss": 3.2009, + "step": 6108 + }, + { + "epoch": 0.99, + "learning_rate": 5.897242653279955e-07, + "loss": 3.1393, + "step": 6109 + }, + { + "epoch": 0.99, + "learning_rate": 5.771126343049881e-07, + "loss": 3.0085, + "step": 6110 + }, + { + "epoch": 0.99, + "learning_rate": 5.646372408597001e-07, + "loss": 3.1107, + "step": 6111 + }, + { + "epoch": 0.99, + "learning_rate": 5.522980883952422e-07, + "loss": 3.2047, + "step": 6112 + }, + { + "epoch": 0.99, + "learning_rate": 5.400951802777553e-07, + "loss": 3.2768, + "step": 6113 + }, + { + "epoch": 0.99, + "learning_rate": 5.280285198359103e-07, + "loss": 3.1898, + "step": 6114 + }, + { + "epoch": 0.99, + "learning_rate": 5.160981103614626e-07, + "loss": 3.0722, + "step": 6115 + }, + { + "epoch": 0.99, + "learning_rate": 5.043039551088646e-07, + "loss": 3.2404, + "step": 6116 + }, + { + "epoch": 0.99, + "learning_rate": 4.926460572954317e-07, + "loss": 3.098, + "step": 6117 + }, + { + "epoch": 0.99, + "learning_rate": 4.811244201013976e-07, + "loss": 3.1392, + "step": 6118 + }, + { + "epoch": 0.99, + "learning_rate": 4.697390466696372e-07, + "loss": 3.0957, + "step": 6119 + }, + { + "epoch": 0.99, + "learning_rate": 4.5848994010611043e-07, + "loss": 3.2409, + "step": 6120 + }, + { + "epoch": 0.99, + "learning_rate": 4.473771034793628e-07, + "loss": 3.1426, + "step": 6121 + }, + { + "epoch": 0.99, + "learning_rate": 4.3640053982085816e-07, + "loss": 3.0202, + "step": 6122 + }, + { + "epoch": 0.99, + "learning_rate": 4.2556025212492356e-07, + "loss": 3.0489, + "step": 6123 + }, + { + "epoch": 0.99, + "learning_rate": 4.1485624334869353e-07, + "loss": 3.0799, + "step": 6124 + }, + { + "epoch": 0.99, + "learning_rate": 4.0428851641211016e-07, + "loss": 2.7815, + "step": 6125 + }, + { + "epoch": 0.99, + "learning_rate": 3.9385707419792305e-07, + "loss": 3.1315, + "step": 6126 + }, + { + "epoch": 0.99, + "learning_rate": 3.835619195517448e-07, + "loss": 3.1249, + "step": 6127 + }, + { + "epoch": 0.99, + "learning_rate": 3.7340305528194006e-07, + "loss": 3.0916, + "step": 6128 + }, + { + "epoch": 0.99, + "learning_rate": 3.633804841598476e-07, + "loss": 2.9497, + "step": 6129 + }, + { + "epoch": 0.99, + "learning_rate": 3.5349420891939157e-07, + "loss": 2.9506, + "step": 6130 + }, + { + "epoch": 0.99, + "learning_rate": 3.437442322575812e-07, + "loss": 3.2029, + "step": 6131 + }, + { + "epoch": 0.99, + "learning_rate": 3.341305568340114e-07, + "loss": 3.1657, + "step": 6132 + }, + { + "epoch": 0.99, + "learning_rate": 3.246531852713064e-07, + "loss": 2.9914, + "step": 6133 + }, + { + "epoch": 0.99, + "learning_rate": 3.1531212015467605e-07, + "loss": 3.1542, + "step": 6134 + }, + { + "epoch": 0.99, + "learning_rate": 3.0610736403235973e-07, + "loss": 3.2101, + "step": 6135 + }, + { + "epoch": 0.99, + "learning_rate": 2.9703891941523785e-07, + "loss": 3.1066, + "step": 6136 + }, + { + "epoch": 0.99, + "learning_rate": 2.881067887771649e-07, + "loss": 3.2224, + "step": 6137 + }, + { + "epoch": 0.99, + "learning_rate": 2.793109745547473e-07, + "loss": 3.1484, + "step": 6138 + }, + { + "epoch": 0.99, + "learning_rate": 2.706514791473436e-07, + "loss": 3.1242, + "step": 6139 + }, + { + "epoch": 0.99, + "learning_rate": 2.621283049171752e-07, + "loss": 3.1083, + "step": 6140 + }, + { + "epoch": 0.99, + "learning_rate": 2.537414541893268e-07, + "loss": 3.2423, + "step": 6141 + }, + { + "epoch": 0.99, + "learning_rate": 2.4549092925157947e-07, + "loss": 3.1174, + "step": 6142 + }, + { + "epoch": 0.99, + "learning_rate": 2.3737673235468827e-07, + "loss": 3.1476, + "step": 6143 + }, + { + "epoch": 0.99, + "learning_rate": 2.2939886571204937e-07, + "loss": 3.0187, + "step": 6144 + }, + { + "epoch": 0.99, + "learning_rate": 2.2155733149992196e-07, + "loss": 3.0417, + "step": 6145 + }, + { + "epoch": 0.99, + "learning_rate": 2.1385213185748375e-07, + "loss": 3.1507, + "step": 6146 + }, + { + "epoch": 0.99, + "learning_rate": 2.06283268886609e-07, + "loss": 3.163, + "step": 6147 + }, + { + "epoch": 0.99, + "learning_rate": 1.9885074465197937e-07, + "loss": 3.0262, + "step": 6148 + }, + { + "epoch": 0.99, + "learning_rate": 1.915545611811398e-07, + "loss": 3.0834, + "step": 6149 + }, + { + "epoch": 0.99, + "learning_rate": 1.843947204644425e-07, + "loss": 3.0183, + "step": 6150 + }, + { + "epoch": 0.99, + "learning_rate": 1.7737122445493637e-07, + "loss": 3.1082, + "step": 6151 + }, + { + "epoch": 0.99, + "learning_rate": 1.7048407506858877e-07, + "loss": 3.1886, + "step": 6152 + }, + { + "epoch": 0.99, + "learning_rate": 1.6373327418423013e-07, + "loss": 3.2377, + "step": 6153 + }, + { + "epoch": 0.99, + "learning_rate": 1.5711882364327635e-07, + "loss": 3.0895, + "step": 6154 + }, + { + "epoch": 0.99, + "learning_rate": 1.5064072525017292e-07, + "loss": 3.2126, + "step": 6155 + }, + { + "epoch": 0.99, + "learning_rate": 1.4429898077211735e-07, + "loss": 3.144, + "step": 6156 + }, + { + "epoch": 0.99, + "learning_rate": 1.380935919389481e-07, + "loss": 3.1136, + "step": 6157 + }, + { + "epoch": 0.99, + "learning_rate": 1.3202456044353328e-07, + "loss": 3.0606, + "step": 6158 + }, + { + "epoch": 0.99, + "learning_rate": 1.2609188794143744e-07, + "loss": 3.1164, + "step": 6159 + }, + { + "epoch": 0.99, + "learning_rate": 1.2029557605097717e-07, + "loss": 3.0261, + "step": 6160 + }, + { + "epoch": 0.99, + "learning_rate": 1.1463562635333213e-07, + "loss": 3.1631, + "step": 6161 + }, + { + "epoch": 0.99, + "learning_rate": 1.0911204039254497e-07, + "loss": 3.1024, + "step": 6162 + }, + { + "epoch": 0.99, + "learning_rate": 1.0372481967541036e-07, + "loss": 3.0692, + "step": 6163 + }, + { + "epoch": 0.99, + "learning_rate": 9.847396567141953e-08, + "loss": 3.0852, + "step": 6164 + }, + { + "epoch": 0.99, + "learning_rate": 9.335947981298221e-08, + "loss": 3.1154, + "step": 6165 + }, + { + "epoch": 0.99, + "learning_rate": 8.838136349526016e-08, + "loss": 3.1759, + "step": 6166 + }, + { + "epoch": 0.99, + "learning_rate": 8.353961807627819e-08, + "loss": 3.1643, + "step": 6167 + }, + { + "epoch": 0.99, + "learning_rate": 7.883424487681312e-08, + "loss": 3.1862, + "step": 6168 + }, + { + "epoch": 0.99, + "learning_rate": 7.42652451804493e-08, + "loss": 3.2032, + "step": 6169 + }, + { + "epoch": 0.99, + "learning_rate": 6.98326202335231e-08, + "loss": 3.0515, + "step": 6170 + }, + { + "epoch": 1.0, + "learning_rate": 6.553637124523393e-08, + "loss": 3.0979, + "step": 6171 + }, + { + "epoch": 1.0, + "learning_rate": 6.137649938758871e-08, + "loss": 3.2015, + "step": 6172 + }, + { + "epoch": 1.0, + "learning_rate": 5.73530057952909e-08, + "loss": 3.0306, + "step": 6173 + }, + { + "epoch": 1.0, + "learning_rate": 5.3465891565962486e-08, + "loss": 3.1208, + "step": 6174 + }, + { + "epoch": 1.0, + "learning_rate": 4.971515775992197e-08, + "loss": 3.0952, + "step": 6175 + }, + { + "epoch": 1.0, + "learning_rate": 4.610080540035089e-08, + "loss": 3.1886, + "step": 6176 + }, + { + "epoch": 1.0, + "learning_rate": 4.262283547323831e-08, + "loss": 3.1066, + "step": 6177 + }, + { + "epoch": 1.0, + "learning_rate": 3.928124892732532e-08, + "loss": 3.1035, + "step": 6178 + }, + { + "epoch": 1.0, + "learning_rate": 3.607604667416054e-08, + "loss": 2.9709, + "step": 6179 + }, + { + "epoch": 1.0, + "learning_rate": 3.300722958810009e-08, + "loss": 3.1927, + "step": 6180 + }, + { + "epoch": 1.0, + "learning_rate": 3.007479850625217e-08, + "loss": 3.0113, + "step": 6181 + }, + { + "epoch": 1.0, + "learning_rate": 2.7278754228587944e-08, + "loss": 3.1605, + "step": 6182 + }, + { + "epoch": 1.0, + "learning_rate": 2.4619097517830646e-08, + "loss": 3.1529, + "step": 6183 + }, + { + "epoch": 1.0, + "learning_rate": 2.209582909945551e-08, + "loss": 3.0735, + "step": 6184 + }, + { + "epoch": 1.0, + "learning_rate": 1.9708949661911834e-08, + "loss": 3.174, + "step": 6185 + }, + { + "epoch": 1.0, + "learning_rate": 1.745845985617889e-08, + "loss": 3.1061, + "step": 6186 + }, + { + "epoch": 1.0, + "learning_rate": 1.5344360296265513e-08, + "loss": 3.1935, + "step": 6187 + }, + { + "epoch": 1.0, + "learning_rate": 1.3366651558877063e-08, + "loss": 3.1264, + "step": 6188 + }, + { + "epoch": 1.0, + "learning_rate": 1.1525334183415392e-08, + "loss": 3.1197, + "step": 6189 + }, + { + "epoch": 1.0, + "learning_rate": 9.82040867225642e-09, + "loss": 3.0783, + "step": 6190 + }, + { + "epoch": 1.0, + "learning_rate": 8.251875490472572e-09, + "loss": 3.1003, + "step": 6191 + }, + { + "epoch": 1.0, + "learning_rate": 6.8197350659437995e-09, + "loss": 3.2202, + "step": 6192 + }, + { + "epoch": 1.0, + "learning_rate": 5.523987789302076e-09, + "loss": 2.8782, + "step": 6193 + }, + { + "epoch": 1.0, + "learning_rate": 4.364634014097923e-09, + "loss": 3.1175, + "step": 6194 + }, + { + "epoch": 1.0, + "learning_rate": 3.3416740565228586e-09, + "loss": 3.1302, + "step": 6195 + }, + { + "epoch": 1.0, + "learning_rate": 2.4551081956314392e-09, + "loss": 3.117, + "step": 6196 + }, + { + "epoch": 1.0, + "learning_rate": 1.7049366732857508e-09, + "loss": 3.1732, + "step": 6197 + }, + { + "epoch": 1.0, + "learning_rate": 1.091159694155408e-09, + "loss": 3.056, + "step": 6198 + }, + { + "epoch": 1.0, + "learning_rate": 6.137774256065321e-10, + "loss": 3.0067, + "step": 6199 + }, + { + "epoch": 1.0, + "learning_rate": 2.727899979793058e-10, + "loss": 3.1415, + "step": 6200 + }, + { + "epoch": 1.0, + "learning_rate": 6.819750414388537e-11, + "loss": 2.839, + "step": 6201 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 3.2192, + "step": 6202 + }, + { + "epoch": 1.0, + "step": 6202, + "total_flos": 1.977746612331479e+17, + "train_loss": 3.291977916983242, + "train_runtime": 15293.3269, + "train_samples_per_second": 38.93, + "train_steps_per_second": 0.406 + } + ], + "logging_steps": 1.0, + "max_steps": 6202, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 2000, + "total_flos": 1.977746612331479e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}