|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998934772573944, |
|
"eval_steps": 500, |
|
"global_step": 1760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 5.2482, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 5.0467, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.8867924528301888e-05, |
|
"loss": 4.9564, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.8867924528301888e-05, |
|
"loss": 5.1658, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.8867924528301888e-05, |
|
"loss": 4.9956, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.7735849056603776e-05, |
|
"loss": 4.9814, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.660377358490566e-05, |
|
"loss": 4.2625, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.547169811320755e-05, |
|
"loss": 3.9414, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.433962264150944e-05, |
|
"loss": 3.7966, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00011320754716981132, |
|
"loss": 3.6802, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001320754716981132, |
|
"loss": 3.6004, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001509433962264151, |
|
"loss": 3.5606, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016981132075471697, |
|
"loss": 3.5021, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018867924528301889, |
|
"loss": 3.5367, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00020754716981132078, |
|
"loss": 3.4328, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00022641509433962264, |
|
"loss": 3.4319, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00024528301886792453, |
|
"loss": 3.4415, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002641509433962264, |
|
"loss": 3.4164, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002830188679245283, |
|
"loss": 3.5059, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0003018867924528302, |
|
"loss": 3.3674, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00032075471698113204, |
|
"loss": 3.4163, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00033962264150943393, |
|
"loss": 3.3629, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0003584905660377358, |
|
"loss": 3.4262, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00037735849056603777, |
|
"loss": 3.4067, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00039622641509433966, |
|
"loss": 3.2488, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00041509433962264155, |
|
"loss": 3.4274, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00043396226415094345, |
|
"loss": 3.2567, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004528301886792453, |
|
"loss": 3.2996, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004716981132075472, |
|
"loss": 3.2675, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004905660377358491, |
|
"loss": 3.166, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000509433962264151, |
|
"loss": 3.1844, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005283018867924528, |
|
"loss": 3.1868, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005471698113207547, |
|
"loss": 3.2863, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005660377358490566, |
|
"loss": 3.1815, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005849056603773585, |
|
"loss": 3.2916, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0006037735849056604, |
|
"loss": 3.1167, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0006226415094339623, |
|
"loss": 3.1742, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0006415094339622641, |
|
"loss": 3.1693, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000660377358490566, |
|
"loss": 3.1958, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0006792452830188679, |
|
"loss": 3.1499, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0006981132075471698, |
|
"loss": 3.1234, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007169811320754717, |
|
"loss": 3.0571, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007358490566037735, |
|
"loss": 3.0833, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007547169811320755, |
|
"loss": 3.1011, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007735849056603774, |
|
"loss": 3.0498, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007924528301886793, |
|
"loss": 3.1238, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0008113207547169812, |
|
"loss": 3.061, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0008301886792452831, |
|
"loss": 3.0054, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000849056603773585, |
|
"loss": 3.0881, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0008679245283018869, |
|
"loss": 3.1222, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0008867924528301887, |
|
"loss": 3.1099, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009056603773584906, |
|
"loss": 2.9995, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009245283018867925, |
|
"loss": 3.0292, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009433962264150943, |
|
"loss": 3.1197, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009622641509433962, |
|
"loss": 3.0159, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009811320754716981, |
|
"loss": 2.93, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.001, |
|
"loss": 2.9495, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009999991532161057, |
|
"loss": 2.9419, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009999966128672907, |
|
"loss": 3.027, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009999923789621597, |
|
"loss": 3.0178, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009999864515150534, |
|
"loss": 2.9705, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009999788305460491, |
|
"loss": 2.927, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009999695160809597, |
|
"loss": 3.0041, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009999585081513348, |
|
"loss": 3.0589, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009999458067944597, |
|
"loss": 2.9795, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009999314120533555, |
|
"loss": 2.8439, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009999153239767793, |
|
"loss": 3.0651, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009998975426192237, |
|
"loss": 3.0304, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000999878068040916, |
|
"loss": 2.9077, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00099985690030782, |
|
"loss": 2.9534, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009998340394916333, |
|
"loss": 2.9447, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009998094856697884, |
|
"loss": 2.8214, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009997832389254527, |
|
"loss": 3.0377, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009997552993475274, |
|
"loss": 3.0556, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009997256670306479, |
|
"loss": 2.9563, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009996943420751824, |
|
"loss": 2.94, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000999661324587233, |
|
"loss": 2.9925, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009996266146786344, |
|
"loss": 2.9317, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009995902124669538, |
|
"loss": 3.021, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009995521180754905, |
|
"loss": 2.9763, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000999512331633275, |
|
"loss": 2.9475, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009994708532750699, |
|
"loss": 2.9488, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009994276831413675, |
|
"loss": 2.7804, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009993828213783913, |
|
"loss": 2.93, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009993362681380942, |
|
"loss": 2.8958, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000999288023578158, |
|
"loss": 2.9788, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009992380878619937, |
|
"loss": 2.9375, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009991864611587405, |
|
"loss": 2.9048, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009991331436432647, |
|
"loss": 2.8672, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009990781354961605, |
|
"loss": 2.8791, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009990214369037474, |
|
"loss": 2.8839, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009989630480580713, |
|
"loss": 2.988, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009989029691569037, |
|
"loss": 2.8999, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000998841200403739, |
|
"loss": 2.9038, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009987777420077974, |
|
"loss": 2.9074, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009987125941840205, |
|
"loss": 2.8319, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009986457571530727, |
|
"loss": 2.9062, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009985772311413403, |
|
"loss": 2.8477, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009985070163809305, |
|
"loss": 2.8691, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009984351131096696, |
|
"loss": 2.9212, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009983615215711041, |
|
"loss": 2.9209, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009982862420144984, |
|
"loss": 2.9573, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009982092746948347, |
|
"loss": 2.8668, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009981306198728116, |
|
"loss": 2.8374, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009980502778148437, |
|
"loss": 2.8702, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009979682487930605, |
|
"loss": 2.799, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009978845330853054, |
|
"loss": 2.8704, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009977991309751346, |
|
"loss": 2.8714, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000997712042751817, |
|
"loss": 2.8934, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009976232687103322, |
|
"loss": 2.8024, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009975328091513696, |
|
"loss": 2.8448, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000997440664381328, |
|
"loss": 2.8474, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009973468347123148, |
|
"loss": 2.8874, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000997251320462143, |
|
"loss": 2.8774, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000997154121954333, |
|
"loss": 2.8746, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009970552395181089, |
|
"loss": 2.9059, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000996954673488399, |
|
"loss": 2.8659, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009968524242058342, |
|
"loss": 2.8242, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009967484920167466, |
|
"loss": 2.7469, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009966428772731689, |
|
"loss": 2.9072, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000996535580332832, |
|
"loss": 2.8385, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009964266015591655, |
|
"loss": 2.8843, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009963159413212952, |
|
"loss": 2.8877, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009962035999940425, |
|
"loss": 2.8456, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009960895779579228, |
|
"loss": 2.8527, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009959738755991436, |
|
"loss": 2.8255, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000995856493309605, |
|
"loss": 2.8471, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009957374314868966, |
|
"loss": 2.847, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000995616690534297, |
|
"loss": 2.7666, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000995494270860772, |
|
"loss": 2.855, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009953701728809736, |
|
"loss": 2.871, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000995244397015239, |
|
"loss": 2.7651, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009951169436895875, |
|
"loss": 2.8782, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000994987813335721, |
|
"loss": 2.7868, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009948570063910214, |
|
"loss": 2.821, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00099472452329855, |
|
"loss": 2.7883, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009945903645070446, |
|
"loss": 2.8112, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009944545304709192, |
|
"loss": 2.8404, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000994317021650262, |
|
"loss": 2.8906, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009941778385108347, |
|
"loss": 2.8831, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009940369815240688, |
|
"loss": 2.7353, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000993894451167066, |
|
"loss": 2.8234, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009937502479225964, |
|
"loss": 2.7929, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009936043722790955, |
|
"loss": 2.7202, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009934568247306642, |
|
"loss": 2.8444, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009933076057770658, |
|
"loss": 2.8307, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000993156715923725, |
|
"loss": 2.8203, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009930041556817269, |
|
"loss": 2.7556, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000992849925567813, |
|
"loss": 2.8758, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000992694026104382, |
|
"loss": 2.7549, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009925364578194861, |
|
"loss": 2.8526, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000992377221246831, |
|
"loss": 2.781, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000992216316925772, |
|
"loss": 2.8254, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009920537454013144, |
|
"loss": 2.7487, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009918895072241096, |
|
"loss": 2.7887, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009917236029504547, |
|
"loss": 2.8059, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00099155603314229, |
|
"loss": 2.8325, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000991386798367197, |
|
"loss": 2.8345, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000991215899198397, |
|
"loss": 2.8227, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009910433362147488, |
|
"loss": 2.8391, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009908691100007463, |
|
"loss": 2.8131, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009906932211465173, |
|
"loss": 2.7959, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009905156702478214, |
|
"loss": 2.8631, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009903364579060476, |
|
"loss": 2.7267, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009901555847282122, |
|
"loss": 2.7919, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009899730513269573, |
|
"loss": 2.711, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009897888583205482, |
|
"loss": 2.8072, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009896030063328717, |
|
"loss": 2.7619, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009894154959934336, |
|
"loss": 2.7956, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009892263279373567, |
|
"loss": 2.7723, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009890355028053792, |
|
"loss": 2.7653, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009888430212438514, |
|
"loss": 2.7335, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009886488839047347, |
|
"loss": 2.7963, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009884530914455983, |
|
"loss": 2.8076, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000988255644529618, |
|
"loss": 2.7262, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009880565438255733, |
|
"loss": 2.8753, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009878557900078449, |
|
"loss": 2.7464, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009876533837564138, |
|
"loss": 2.7399, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009874493257568569, |
|
"loss": 2.7399, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009872436167003467, |
|
"loss": 2.7471, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009870362572836472, |
|
"loss": 2.7681, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009868272482091134, |
|
"loss": 2.8374, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000986616590184687, |
|
"loss": 2.7107, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009864042839238954, |
|
"loss": 2.8556, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009861903301458487, |
|
"loss": 2.7352, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009859747295752372, |
|
"loss": 2.8007, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009857574829423297, |
|
"loss": 2.7395, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009855385909829696, |
|
"loss": 2.8226, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009853180544385737, |
|
"loss": 2.7514, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000985095874056129, |
|
"loss": 2.7679, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009848720505881909, |
|
"loss": 2.805, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00098464658479288, |
|
"loss": 2.7142, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009844194774338792, |
|
"loss": 2.7042, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009841907292804316, |
|
"loss": 2.7734, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009839603411073389, |
|
"loss": 2.7753, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009837283136949566, |
|
"loss": 2.7156, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009834946478291932, |
|
"loss": 2.6806, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009832593443015066, |
|
"loss": 2.7494, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000983022403908902, |
|
"loss": 2.6956, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000982783827453928, |
|
"loss": 2.7452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009825436157446762, |
|
"loss": 2.6981, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009823017695947756, |
|
"loss": 2.8186, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009820582898233921, |
|
"loss": 2.6943, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009818131772552248, |
|
"loss": 2.7198, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009815664327205032, |
|
"loss": 2.7194, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009813180570549841, |
|
"loss": 2.834, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009810680510999503, |
|
"loss": 2.7756, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009808164157022053, |
|
"loss": 2.7234, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009805631517140723, |
|
"loss": 2.7346, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000980308259993391, |
|
"loss": 2.6711, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009800517414035141, |
|
"loss": 2.7579, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000979793596813305, |
|
"loss": 2.7716, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000979533827097134, |
|
"loss": 2.7547, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009792724331348767, |
|
"loss": 2.7107, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00097900941581191, |
|
"loss": 2.7257, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009787447760191091, |
|
"loss": 2.7289, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009784785146528447, |
|
"loss": 2.6947, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009782106326149801, |
|
"loss": 2.769, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009779411308128685, |
|
"loss": 2.7123, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009776700101593488, |
|
"loss": 2.7408, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009773972715727433, |
|
"loss": 2.7125, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009771229159768547, |
|
"loss": 2.6776, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009768469443009625, |
|
"loss": 2.7475, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009765693574798203, |
|
"loss": 2.7532, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009762901564536521, |
|
"loss": 2.6687, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00097600934216815, |
|
"loss": 2.783, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009757269155744696, |
|
"loss": 2.7581, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009754428776292286, |
|
"loss": 2.7305, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009751572292945015, |
|
"loss": 2.7487, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009748699715378182, |
|
"loss": 2.7499, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009745811053321596, |
|
"loss": 2.6904, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009742906316559548, |
|
"loss": 2.7082, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009739985514930775, |
|
"loss": 2.7261, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009737048658328426, |
|
"loss": 2.6158, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009734095756700037, |
|
"loss": 2.6389, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009731126820047481, |
|
"loss": 2.6846, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009728141858426952, |
|
"loss": 2.7424, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000972514088194892, |
|
"loss": 2.7509, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009722123900778096, |
|
"loss": 2.722, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009719090925133407, |
|
"loss": 2.6854, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009716041965287953, |
|
"loss": 2.7199, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009712977031568973, |
|
"loss": 2.6969, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009709896134357815, |
|
"loss": 2.7208, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009706799284089891, |
|
"loss": 2.647, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009703686491254658, |
|
"loss": 2.6342, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009700557766395565, |
|
"loss": 2.7401, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009697413120110028, |
|
"loss": 2.701, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000969425256304939, |
|
"loss": 2.6749, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009691076105918884, |
|
"loss": 2.7221, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009687883759477605, |
|
"loss": 2.7285, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009684675534538461, |
|
"loss": 2.704, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009681451441968143, |
|
"loss": 2.7219, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009678211492687095, |
|
"loss": 2.6817, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009674955697669458, |
|
"loss": 2.6934, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009671684067943056, |
|
"loss": 2.6851, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009668396614589341, |
|
"loss": 2.6716, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009665093348743361, |
|
"loss": 2.7255, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009661774281593729, |
|
"loss": 2.659, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009658439424382575, |
|
"loss": 2.6577, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009655088788405508, |
|
"loss": 2.7046, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009651722385011592, |
|
"loss": 2.6396, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009648340225603287, |
|
"loss": 2.7347, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000964494232163643, |
|
"loss": 2.6952, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009641528684620179, |
|
"loss": 2.7599, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009638099326116987, |
|
"loss": 2.7123, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009634654257742554, |
|
"loss": 2.6239, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009631193491165797, |
|
"loss": 2.6331, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009627717038108799, |
|
"loss": 2.6387, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000962422491034678, |
|
"loss": 2.7134, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009620717119708047, |
|
"loss": 2.6042, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009617193678073965, |
|
"loss": 2.7081, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009613654597378909, |
|
"loss": 2.6387, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009610099889610223, |
|
"loss": 2.6194, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009606529566808186, |
|
"loss": 2.6575, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009602943641065964, |
|
"loss": 2.6457, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009599342124529575, |
|
"loss": 2.5716, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009595725029397841, |
|
"loss": 2.6692, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009592092367922358, |
|
"loss": 2.6708, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009588444152407441, |
|
"loss": 2.686, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009584780395210088, |
|
"loss": 2.5811, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009581101108739944, |
|
"loss": 2.5678, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000957740630545925, |
|
"loss": 2.6341, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009573695997882808, |
|
"loss": 2.6191, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000956997019857793, |
|
"loss": 2.7044, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009566228920164405, |
|
"loss": 2.6047, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009562472175314449, |
|
"loss": 2.6376, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009558699976752668, |
|
"loss": 2.686, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009554912337256007, |
|
"loss": 2.6306, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009551109269653718, |
|
"loss": 2.6262, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009547290786827303, |
|
"loss": 2.6516, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009543456901710483, |
|
"loss": 2.6135, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009539607627289146, |
|
"loss": 2.7414, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009535742976601309, |
|
"loss": 2.6305, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009531862962737065, |
|
"loss": 2.6181, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009527967598838547, |
|
"loss": 2.6371, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009524056898099881, |
|
"loss": 2.6824, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009520130873767141, |
|
"loss": 2.5707, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009516189539138305, |
|
"loss": 2.5983, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009512232907563206, |
|
"loss": 2.6824, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009508260992443492, |
|
"loss": 2.6618, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009504273807232576, |
|
"loss": 2.6094, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009500271365435599, |
|
"loss": 2.5858, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000949625368060937, |
|
"loss": 2.6308, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009492220766362336, |
|
"loss": 2.5783, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009488172636354521, |
|
"loss": 2.6703, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009484109304297492, |
|
"loss": 2.6737, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009480030783954306, |
|
"loss": 2.5912, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009475937089139463, |
|
"loss": 2.5694, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009471828233718863, |
|
"loss": 2.6827, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009467704231609755, |
|
"loss": 2.6466, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009463565096780695, |
|
"loss": 2.6016, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009459410843251495, |
|
"loss": 2.5613, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009455241485093171, |
|
"loss": 2.5848, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009451057036427909, |
|
"loss": 2.6192, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009446857511428998, |
|
"loss": 2.6521, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009442642924320806, |
|
"loss": 2.6224, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009438413289378705, |
|
"loss": 2.5611, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009434168620929044, |
|
"loss": 2.602, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009429908933349091, |
|
"loss": 2.5021, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009425634241066985, |
|
"loss": 2.5542, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009421344558561687, |
|
"loss": 2.5592, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009417039900362936, |
|
"loss": 2.6289, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000941272028105119, |
|
"loss": 2.5938, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009408385715257588, |
|
"loss": 2.5195, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000940403621766389, |
|
"loss": 2.6641, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009399671803002434, |
|
"loss": 2.5733, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009395292486056086, |
|
"loss": 2.5534, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009390898281658184, |
|
"loss": 2.6198, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009386489204692497, |
|
"loss": 2.6807, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009382065270093163, |
|
"loss": 2.5644, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009377626492844649, |
|
"loss": 2.6815, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009373172887981699, |
|
"loss": 2.5921, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009368704470589271, |
|
"loss": 2.5429, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009364221255802504, |
|
"loss": 2.5444, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009359723258806654, |
|
"loss": 2.5487, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009355210494837045, |
|
"loss": 2.5282, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009350682979179024, |
|
"loss": 2.6438, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009346140727167896, |
|
"loss": 2.5692, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009341583754188887, |
|
"loss": 2.5261, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000933701207567708, |
|
"loss": 2.6031, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009332425707117373, |
|
"loss": 2.5488, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009327824664044417, |
|
"loss": 2.6051, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009323208962042569, |
|
"loss": 2.6468, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009318578616745834, |
|
"loss": 2.4675, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009313933643837825, |
|
"loss": 2.6519, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009309274059051691, |
|
"loss": 2.5038, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009304599878170078, |
|
"loss": 2.6157, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009299911117025071, |
|
"loss": 2.6254, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000929520779149814, |
|
"loss": 2.5462, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009290489917520087, |
|
"loss": 2.5864, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009285757511070987, |
|
"loss": 2.5915, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009281010588180146, |
|
"loss": 2.5718, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009276249164926034, |
|
"loss": 2.6059, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009271473257436238, |
|
"loss": 2.5459, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009266682881887403, |
|
"loss": 2.5749, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009261878054505181, |
|
"loss": 2.5975, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009257058791564173, |
|
"loss": 2.5799, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009252225109387879, |
|
"loss": 2.5351, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009247377024348631, |
|
"loss": 2.6105, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009242514552867555, |
|
"loss": 2.6401, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009237637711414497, |
|
"loss": 2.5392, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009232746516507984, |
|
"loss": 2.589, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009227840984715153, |
|
"loss": 2.5614, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009222921132651708, |
|
"loss": 2.5614, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009217986976981854, |
|
"loss": 2.5427, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009213038534418243, |
|
"loss": 2.5695, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009208075821721926, |
|
"loss": 2.5442, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009203098855702277, |
|
"loss": 2.6164, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009198107653216961, |
|
"loss": 2.5373, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009193102231171852, |
|
"loss": 2.5858, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009188082606520997, |
|
"loss": 2.5471, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009183048796266546, |
|
"loss": 2.5069, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009178000817458694, |
|
"loss": 2.5816, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009172938687195629, |
|
"loss": 2.4701, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009167862422623474, |
|
"loss": 2.6111, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009162772040936227, |
|
"loss": 2.6355, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009157667559375697, |
|
"loss": 2.5845, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.000915254899523146, |
|
"loss": 2.6657, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009147416365840783, |
|
"loss": 2.5875, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009142269688588578, |
|
"loss": 2.5423, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000913710898090734, |
|
"loss": 2.5814, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009131934260277084, |
|
"loss": 2.5892, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009126745544225292, |
|
"loss": 2.6292, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009121542850326848, |
|
"loss": 2.5795, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009116326196203981, |
|
"loss": 2.5144, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009111095599526206, |
|
"loss": 2.5801, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009105851078010266, |
|
"loss": 2.5736, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009100592649420061, |
|
"loss": 2.56, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009095320331566606, |
|
"loss": 2.5927, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009090034142307954, |
|
"loss": 2.5959, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009084734099549146, |
|
"loss": 2.5392, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009079420221242145, |
|
"loss": 2.6631, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009074092525385777, |
|
"loss": 2.5413, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000906875103002567, |
|
"loss": 2.5866, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009063395753254193, |
|
"loss": 2.5817, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009058026713210397, |
|
"loss": 2.6183, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009052643928079945, |
|
"loss": 2.5762, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009047247416095059, |
|
"loss": 2.5657, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009041837195534462, |
|
"loss": 2.4247, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009036413284723301, |
|
"loss": 2.6515, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009030975702033098, |
|
"loss": 2.5671, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009025524465881683, |
|
"loss": 2.5696, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000902005959473313, |
|
"loss": 2.5915, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009014581107097702, |
|
"loss": 2.5173, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009009089021531776, |
|
"loss": 2.5242, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009003583356637793, |
|
"loss": 2.5752, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0008998064131064185, |
|
"loss": 2.4775, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0008992531363505318, |
|
"loss": 2.5739, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0008986985072701425, |
|
"loss": 2.4938, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0008981425277438546, |
|
"loss": 2.4801, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0008975851996548459, |
|
"loss": 2.4974, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0008970265248908626, |
|
"loss": 2.5897, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0008964665053442116, |
|
"loss": 2.5122, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000895905142911755, |
|
"loss": 2.5803, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008953424394949035, |
|
"loss": 2.6401, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00089477839699961, |
|
"loss": 2.561, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008942130173363627, |
|
"loss": 2.6039, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008936463024201794, |
|
"loss": 2.5148, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008930782541706002, |
|
"loss": 2.5421, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008925088745116816, |
|
"loss": 2.4854, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008919381653719895, |
|
"loss": 2.5535, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008913661286845936, |
|
"loss": 2.576, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008907927663870592, |
|
"loss": 2.6122, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008902180804214423, |
|
"loss": 2.6166, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008896420727342823, |
|
"loss": 2.5119, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008890647452765953, |
|
"loss": 2.5047, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008884861000038676, |
|
"loss": 2.5731, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008879061388760492, |
|
"loss": 2.576, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008873248638575471, |
|
"loss": 2.5827, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008867422769172187, |
|
"loss": 2.5291, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008861583800283646, |
|
"loss": 2.4942, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008855731751687232, |
|
"loss": 2.5566, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008849866643204625, |
|
"loss": 2.5337, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000884398849470174, |
|
"loss": 2.5165, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008838097326088666, |
|
"loss": 2.6361, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000883219315731959, |
|
"loss": 2.5504, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008826276008392731, |
|
"loss": 2.5573, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008820345899350273, |
|
"loss": 2.562, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008814402850278303, |
|
"loss": 2.4775, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008808446881306731, |
|
"loss": 2.5607, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008802478012609234, |
|
"loss": 2.5578, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008796496264403175, |
|
"loss": 2.5214, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008790501656949552, |
|
"loss": 2.519, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008784494210552909, |
|
"loss": 2.5888, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008778473945561283, |
|
"loss": 2.487, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008772440882366126, |
|
"loss": 2.5737, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008766395041402244, |
|
"loss": 2.5556, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008760336443147718, |
|
"loss": 2.5803, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008754265108123843, |
|
"loss": 2.5664, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008748181056895051, |
|
"loss": 2.5422, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008742084310068851, |
|
"loss": 2.5137, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008735974888295753, |
|
"loss": 2.4922, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008729852812269191, |
|
"loss": 2.5818, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008723718102725471, |
|
"loss": 2.5574, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008717570780443685, |
|
"loss": 2.582, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008711410866245647, |
|
"loss": 2.4976, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008705238380995821, |
|
"loss": 2.5415, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008699053345601252, |
|
"loss": 2.548, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008692855781011493, |
|
"loss": 2.4905, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008686645708218535, |
|
"loss": 2.5447, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008680423148256737, |
|
"loss": 2.5469, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008674188122202755, |
|
"loss": 2.5667, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008667940651175465, |
|
"loss": 2.5645, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00086616807563359, |
|
"loss": 2.4549, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008655408458887171, |
|
"loss": 2.4675, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008649123780074401, |
|
"loss": 2.4899, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000864282674118465, |
|
"loss": 2.4976, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008636517363546838, |
|
"loss": 2.5422, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008630195668531689, |
|
"loss": 2.5581, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008623861677551637, |
|
"loss": 2.5215, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000861751541206077, |
|
"loss": 2.583, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008611156893554748, |
|
"loss": 2.5575, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008604786143570735, |
|
"loss": 2.4754, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008598403183687328, |
|
"loss": 2.5841, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008592008035524474, |
|
"loss": 2.5482, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008585600720743409, |
|
"loss": 2.4925, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008579181261046576, |
|
"loss": 2.5339, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008572749678177555, |
|
"loss": 2.5182, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000856630599392099, |
|
"loss": 2.4739, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008559850230102512, |
|
"loss": 2.5473, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000855338240858867, |
|
"loss": 2.5086, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008546902551286853, |
|
"loss": 2.5512, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008540410680145211, |
|
"loss": 2.4758, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008533906817152599, |
|
"loss": 2.5077, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008527390984338477, |
|
"loss": 2.5646, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008520863203772857, |
|
"loss": 2.555, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008514323497566216, |
|
"loss": 2.4723, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008507771887869426, |
|
"loss": 2.5635, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008501208396873676, |
|
"loss": 2.503, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008494633046810402, |
|
"loss": 2.4948, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008488045859951206, |
|
"loss": 2.5435, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000848144685860778, |
|
"loss": 2.5013, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008474836065131838, |
|
"loss": 2.5691, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008468213501915034, |
|
"loss": 2.5194, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008461579191388889, |
|
"loss": 2.5856, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008454933156024709, |
|
"loss": 2.6139, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008448275418333518, |
|
"loss": 2.4525, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008441606000865978, |
|
"loss": 2.5658, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008434924926212307, |
|
"loss": 2.4961, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008428232217002214, |
|
"loss": 2.546, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008421527895904811, |
|
"loss": 2.496, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008414811985628544, |
|
"loss": 2.4998, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008408084508921106, |
|
"loss": 2.4938, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000840134548856938, |
|
"loss": 2.5078, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008394594947399337, |
|
"loss": 2.5869, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008387832908275977, |
|
"loss": 2.5255, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008381059394103243, |
|
"loss": 2.5458, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008374274427823946, |
|
"loss": 2.5222, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008367478032419686, |
|
"loss": 2.5068, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008360670230910777, |
|
"loss": 2.511, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008353851046356163, |
|
"loss": 2.4518, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008347020501853348, |
|
"loss": 2.404, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008340178620538315, |
|
"loss": 2.5601, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008333325425585439, |
|
"loss": 2.4297, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008326460940207423, |
|
"loss": 2.5419, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000831958518765521, |
|
"loss": 2.5175, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008312698191217906, |
|
"loss": 2.5704, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008305799974222701, |
|
"loss": 2.5524, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008298890560034791, |
|
"loss": 2.5907, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008291969972057298, |
|
"loss": 2.5519, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008285038233731193, |
|
"loss": 2.3889, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008278095368535215, |
|
"loss": 2.4414, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008271141399985787, |
|
"loss": 2.4874, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008264176351636943, |
|
"loss": 2.4667, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008257200247080247, |
|
"loss": 2.4855, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008250213109944712, |
|
"loss": 2.4921, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008243214963896718, |
|
"loss": 2.4261, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008236205832639935, |
|
"loss": 2.4202, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000822918573991524, |
|
"loss": 2.5184, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008222154709500637, |
|
"loss": 2.5094, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008215112765211184, |
|
"loss": 2.4613, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008208059930898898, |
|
"loss": 2.4444, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008200996230452684, |
|
"loss": 2.5802, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008193921687798256, |
|
"loss": 2.5125, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008186836326898047, |
|
"loss": 2.4846, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008179740171751135, |
|
"loss": 2.4259, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000817263324639316, |
|
"loss": 2.4611, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008165515574896243, |
|
"loss": 2.5396, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008158387181368901, |
|
"loss": 2.5483, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008151248089955969, |
|
"loss": 2.4982, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008144098324838518, |
|
"loss": 2.4895, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008136937910233772, |
|
"loss": 2.4413, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008129766870395026, |
|
"loss": 2.5752, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008122585229611565, |
|
"loss": 2.4546, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008115393012208578, |
|
"loss": 2.475, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008108190242547082, |
|
"loss": 2.4423, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008100976945023834, |
|
"loss": 2.5039, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000809375314407125, |
|
"loss": 2.4312, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008086518864157324, |
|
"loss": 2.4996, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008079274129785543, |
|
"loss": 2.5001, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008072018965494804, |
|
"loss": 2.5265, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008064753395859332, |
|
"loss": 2.4214, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008057477445488597, |
|
"loss": 2.4731, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000805019113902723, |
|
"loss": 2.4975, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0008042894501154937, |
|
"loss": 2.4966, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0008035587556586421, |
|
"loss": 2.4733, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0008028270330071292, |
|
"loss": 2.5611, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0008020942846393992, |
|
"loss": 2.4482, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0008013605130373697, |
|
"loss": 2.4835, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0008006257206864252, |
|
"loss": 2.5224, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007998899100754064, |
|
"loss": 2.5073, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007991530836966039, |
|
"loss": 2.3737, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007984152440457485, |
|
"loss": 2.5418, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000797676393622003, |
|
"loss": 2.4813, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007969365349279544, |
|
"loss": 2.515, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007961956704696039, |
|
"loss": 2.5198, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007954538027563601, |
|
"loss": 2.5001, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007947109343010295, |
|
"loss": 2.4378, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007939670676198083, |
|
"loss": 2.4959, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007932222052322737, |
|
"loss": 2.493, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007924763496613756, |
|
"loss": 2.4409, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000791729503433428, |
|
"loss": 2.5195, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007909816690781004, |
|
"loss": 2.5452, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000790232849128409, |
|
"loss": 2.49, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007894830461207085, |
|
"loss": 2.5596, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007887322625946835, |
|
"loss": 2.503, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007879805010933396, |
|
"loss": 2.5275, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007872277641629948, |
|
"loss": 2.4316, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007864740543532711, |
|
"loss": 2.4647, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007857193742170859, |
|
"loss": 2.4628, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007849637263106431, |
|
"loss": 2.4359, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007842071131934246, |
|
"loss": 2.3986, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007834495374281816, |
|
"loss": 2.459, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007826910015809261, |
|
"loss": 2.4605, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007819315082209217, |
|
"loss": 2.4607, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007811710599206752, |
|
"loss": 2.4803, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007804096592559284, |
|
"loss": 2.5248, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007796473088056487, |
|
"loss": 2.4919, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007788840111520197, |
|
"loss": 2.5144, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007781197688804349, |
|
"loss": 2.513, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000777354584579486, |
|
"loss": 2.4596, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007765884608409561, |
|
"loss": 2.4682, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007758214002598103, |
|
"loss": 2.3991, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007750534054341866, |
|
"loss": 2.4929, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007742844789653875, |
|
"loss": 2.4513, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007735146234578716, |
|
"loss": 2.49, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007727438415192434, |
|
"loss": 2.4734, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007719721357602461, |
|
"loss": 2.4681, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007711995087947517, |
|
"loss": 2.4802, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007704259632397525, |
|
"loss": 2.5405, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007696515017153522, |
|
"loss": 2.5218, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007688761268447569, |
|
"loss": 2.52, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007680998412542664, |
|
"loss": 2.5314, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007673226475732652, |
|
"loss": 2.3808, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007665445484342137, |
|
"loss": 2.4448, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007657655464726394, |
|
"loss": 2.4814, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007649856443271273, |
|
"loss": 2.4774, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007642048446393117, |
|
"loss": 2.4356, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007634231500538671, |
|
"loss": 2.4701, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007626405632184992, |
|
"loss": 2.4459, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007618570867839354, |
|
"loss": 2.5487, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007610727234039167, |
|
"loss": 2.4338, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007602874757351883, |
|
"loss": 2.4479, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007595013464374904, |
|
"loss": 2.4928, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007587143381735498, |
|
"loss": 2.423, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007579264536090697, |
|
"loss": 2.4827, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007571376954127223, |
|
"loss": 2.5261, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007563480662561386, |
|
"loss": 2.47, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007555575688138993, |
|
"loss": 2.4221, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007547662057635266, |
|
"loss": 2.4059, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007539739797854746, |
|
"loss": 2.381, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007531808935631197, |
|
"loss": 2.5162, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007523869497827527, |
|
"loss": 2.4406, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007515921511335688, |
|
"loss": 2.4417, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007507965003076589, |
|
"loss": 2.4449, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00075, |
|
"loss": 2.4725, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007492026529084468, |
|
"loss": 2.4522, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000748404461733722, |
|
"loss": 2.5874, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000747605429179407, |
|
"loss": 2.5147, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007468055579519338, |
|
"loss": 2.4721, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007460048507605746, |
|
"loss": 2.4702, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007452033103174332, |
|
"loss": 2.3927, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007444009393374356, |
|
"loss": 2.4879, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007435977405383212, |
|
"loss": 2.4123, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007427937166406332, |
|
"loss": 2.4447, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007419888703677097, |
|
"loss": 2.4302, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007411832044456739, |
|
"loss": 2.4721, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007403767216034257, |
|
"loss": 2.4133, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007395694245726318, |
|
"loss": 2.3523, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007387613160877165, |
|
"loss": 2.4691, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007379523988858532, |
|
"loss": 2.447, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007371426757069537, |
|
"loss": 2.5446, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007363321492936604, |
|
"loss": 2.4753, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007355208223913362, |
|
"loss": 2.4594, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007347086977480551, |
|
"loss": 2.4352, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007338957781145936, |
|
"loss": 2.442, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007330820662444207, |
|
"loss": 2.3776, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007322675648936887, |
|
"loss": 2.3646, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007314522768212241, |
|
"loss": 2.4233, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007306362047885182, |
|
"loss": 2.4675, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007298193515597177, |
|
"loss": 2.5046, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007290017199016152, |
|
"loss": 2.548, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007281833125836397, |
|
"loss": 2.4224, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007273641323778481, |
|
"loss": 2.4324, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007265441820589145, |
|
"loss": 2.4377, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007257234644041222, |
|
"loss": 2.4007, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007249019821933529, |
|
"loss": 2.4604, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007240797382090783, |
|
"loss": 2.483, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007232567352363501, |
|
"loss": 2.4878, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007224329760627911, |
|
"loss": 2.5135, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007216084634785854, |
|
"loss": 2.495, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007207832002764687, |
|
"loss": 2.4743, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007199571892517193, |
|
"loss": 2.4359, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007191304332021489, |
|
"loss": 2.4334, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007183029349280921, |
|
"loss": 2.4952, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007174746972323975, |
|
"loss": 2.439, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007166457229204189, |
|
"loss": 2.5289, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007158160148000044, |
|
"loss": 2.4211, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000714985575681488, |
|
"loss": 2.4422, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007141544083776795, |
|
"loss": 2.4207, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007133225157038554, |
|
"loss": 2.4256, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007124899004777489, |
|
"loss": 2.4774, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007116565655195406, |
|
"loss": 2.3843, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000710822513651849, |
|
"loss": 2.5185, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000709987747699721, |
|
"loss": 2.4379, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007091522704906218, |
|
"loss": 2.4733, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007083160848544261, |
|
"loss": 2.4462, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007074791936234082, |
|
"loss": 2.3959, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007066415996322318, |
|
"loss": 2.4609, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007058033057179415, |
|
"loss": 2.4024, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007049643147199524, |
|
"loss": 2.5052, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007041246294800411, |
|
"loss": 2.4512, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007032842528423348, |
|
"loss": 2.4629, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007024431876533035, |
|
"loss": 2.3917, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007016014367617487, |
|
"loss": 2.4708, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007007590030187952, |
|
"loss": 2.4183, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006999158892778798, |
|
"loss": 2.4237, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006990720983947435, |
|
"loss": 2.45, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006982276332274202, |
|
"loss": 2.4202, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006973824966362281, |
|
"loss": 2.4131, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006965366914837591, |
|
"loss": 2.4219, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006956902206348702, |
|
"loss": 2.4663, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006948430869566728, |
|
"loss": 2.5585, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006939952933185234, |
|
"loss": 2.4702, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006931468425920142, |
|
"loss": 2.4307, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006922977376509629, |
|
"loss": 2.4803, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.000691447981371403, |
|
"loss": 2.4817, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006905975766315739, |
|
"loss": 2.4975, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006897465263119122, |
|
"loss": 2.5327, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006888948332950405, |
|
"loss": 2.4044, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006880425004657586, |
|
"loss": 2.5197, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006871895307110332, |
|
"loss": 2.4461, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006863359269199886, |
|
"loss": 2.4614, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006854816919838967, |
|
"loss": 2.3401, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006846268287961667, |
|
"loss": 2.4542, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006837713402523364, |
|
"loss": 2.4437, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006829152292500613, |
|
"loss": 2.4147, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006820584986891058, |
|
"loss": 2.512, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006812011514713321, |
|
"loss": 2.4996, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006803431905006916, |
|
"loss": 2.4634, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006794846186832143, |
|
"loss": 2.4315, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006786254389269994, |
|
"loss": 2.4492, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006777656541422055, |
|
"loss": 2.4084, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006769052672410398, |
|
"loss": 2.4165, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006760442811377499, |
|
"loss": 2.4669, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006751826987486118, |
|
"loss": 2.3723, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006743205229919224, |
|
"loss": 2.4004, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006734577567879876, |
|
"loss": 2.4222, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006725944030591135, |
|
"loss": 2.4204, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006717304647295964, |
|
"loss": 2.4191, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006708659447257126, |
|
"loss": 2.4486, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006700008459757083, |
|
"loss": 2.3486, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006691351714097905, |
|
"loss": 2.3805, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006682689239601161, |
|
"loss": 2.402, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006674021065607829, |
|
"loss": 2.367, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006665347221478187, |
|
"loss": 2.4574, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006656667736591724, |
|
"loss": 2.4072, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006647982640347029, |
|
"loss": 2.4673, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006639291962161702, |
|
"loss": 2.4676, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006630595731472249, |
|
"loss": 2.4421, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000662189397773398, |
|
"loss": 2.4604, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006613186730420917, |
|
"loss": 2.4199, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006604474019025686, |
|
"loss": 2.4471, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006595755873059422, |
|
"loss": 2.4323, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006587032322051667, |
|
"loss": 2.3491, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006578303395550271, |
|
"loss": 2.4445, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006569569123121294, |
|
"loss": 2.5522, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006560829534348897, |
|
"loss": 2.393, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006552084658835254, |
|
"loss": 2.4933, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006543334526200445, |
|
"loss": 2.4027, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006534579166082354, |
|
"loss": 2.4971, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006525818608136572, |
|
"loss": 2.4315, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006517052882036298, |
|
"loss": 2.4696, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006508282017472235, |
|
"loss": 2.4609, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000649950604415249, |
|
"loss": 2.4614, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006490724991802474, |
|
"loss": 2.4132, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00064819388901648, |
|
"loss": 2.5059, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000647314776899919, |
|
"loss": 2.4084, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000646435165808236, |
|
"loss": 2.4937, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000645555058720793, |
|
"loss": 2.5148, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006446744586186322, |
|
"loss": 2.4094, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006437933684844655, |
|
"loss": 2.4952, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006429117913026646, |
|
"loss": 2.4557, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000642029730059251, |
|
"loss": 2.5387, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006411471877418855, |
|
"loss": 2.4937, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000640264167339859, |
|
"loss": 2.4658, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006393806718440806, |
|
"loss": 2.4209, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006384967042470702, |
|
"loss": 2.4134, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006376122675429452, |
|
"loss": 2.4574, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006367273647274131, |
|
"loss": 2.3942, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006358419987977595, |
|
"loss": 2.4388, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006349561727528388, |
|
"loss": 2.3744, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006340698895930638, |
|
"loss": 2.4443, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006331831523203963, |
|
"loss": 2.4567, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000632295963938335, |
|
"loss": 2.498, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006314083274519076, |
|
"loss": 2.5225, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000630520245867659, |
|
"loss": 2.4169, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006296317221936421, |
|
"loss": 2.4793, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006287427594394069, |
|
"loss": 2.4272, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000627853360615991, |
|
"loss": 2.5425, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006269635287359086, |
|
"loss": 2.3694, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006260732668131409, |
|
"loss": 2.4553, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006251825778631258, |
|
"loss": 2.4705, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006242914649027476, |
|
"loss": 2.416, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006233999309503267, |
|
"loss": 2.3962, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006225079790256094, |
|
"loss": 2.4481, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006216156121497578, |
|
"loss": 2.4229, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006207228333453396, |
|
"loss": 2.3921, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006198296456363174, |
|
"loss": 2.4722, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006189360520480393, |
|
"loss": 2.4749, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006180420556072279, |
|
"loss": 2.482, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006171476593419703, |
|
"loss": 2.4231, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000616252866281708, |
|
"loss": 2.4011, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006153576794572262, |
|
"loss": 2.4231, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006144621019006443, |
|
"loss": 2.478, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006135661366454045, |
|
"loss": 2.4443, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006126697867262632, |
|
"loss": 2.4758, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006117730551792786, |
|
"loss": 2.4348, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006108759450418022, |
|
"loss": 2.4317, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006099784593524678, |
|
"loss": 2.4133, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006090806011511808, |
|
"loss": 2.4218, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006081823734791089, |
|
"loss": 2.3959, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.000607283779378671, |
|
"loss": 2.436, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.000606384821893527, |
|
"loss": 2.4626, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006054855040685679, |
|
"loss": 2.4465, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.000604585828949905, |
|
"loss": 2.4273, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00060368579958486, |
|
"loss": 2.4659, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006027854190219544, |
|
"loss": 2.3485, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006018846903108991, |
|
"loss": 2.4722, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006009836165025845, |
|
"loss": 2.372, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0006000822006490696, |
|
"loss": 2.4584, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005991804458035724, |
|
"loss": 2.5084, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005982783550204585, |
|
"loss": 2.3997, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005973759313552318, |
|
"loss": 2.4404, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005964731778645237, |
|
"loss": 2.4222, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005955700976060826, |
|
"loss": 2.4262, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005946666936387637, |
|
"loss": 2.4388, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005937629690225188, |
|
"loss": 2.4265, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005928589268183856, |
|
"loss": 2.3676, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005919545700884778, |
|
"loss": 2.3852, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005910499018959741, |
|
"loss": 2.3914, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005901449253051084, |
|
"loss": 2.4019, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005892396433811589, |
|
"loss": 2.4193, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005883340591904385, |
|
"loss": 2.343, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005874281758002835, |
|
"loss": 2.4708, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005865219962790438, |
|
"loss": 2.4664, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005856155236960723, |
|
"loss": 2.3792, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005847087611217145, |
|
"loss": 2.4771, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005838017116272981, |
|
"loss": 2.3939, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005828943782851228, |
|
"loss": 2.4681, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005819867641684497, |
|
"loss": 2.5123, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005810788723514908, |
|
"loss": 2.478, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005801707059093988, |
|
"loss": 2.4284, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005792622679182565, |
|
"loss": 2.3805, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005783535614550666, |
|
"loss": 2.4595, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.000577444589597741, |
|
"loss": 2.4835, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005765353554250907, |
|
"loss": 2.4444, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005756258620168151, |
|
"loss": 2.3644, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005747161124534918, |
|
"loss": 2.3951, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005738061098165657, |
|
"loss": 2.3674, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005728958571883392, |
|
"loss": 2.3816, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005719853576519614, |
|
"loss": 2.4328, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005710746142914177, |
|
"loss": 2.4554, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005701636301915191, |
|
"loss": 2.4233, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005692524084378926, |
|
"loss": 2.3548, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005683409521169697, |
|
"loss": 2.3324, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005674292643159764, |
|
"loss": 2.4403, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000566517348122923, |
|
"loss": 2.4564, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005656052066265931, |
|
"loss": 2.3844, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005646928429165339, |
|
"loss": 2.3731, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005637802600830449, |
|
"loss": 2.4234, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005628674612171679, |
|
"loss": 2.3916, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005619544494106761, |
|
"loss": 2.3402, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005610412277560646, |
|
"loss": 2.5129, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000560127799346539, |
|
"loss": 2.3983, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005592141672760049, |
|
"loss": 2.4103, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005583003346390582, |
|
"loss": 2.3486, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005573863045309738, |
|
"loss": 2.4649, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005564720800476958, |
|
"loss": 2.4571, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005555576642858263, |
|
"loss": 2.3613, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005546430603426154, |
|
"loss": 2.4394, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005537282713159507, |
|
"loss": 2.4288, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005528133003043468, |
|
"loss": 2.4392, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005518981504069345, |
|
"loss": 2.4756, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005509828247234505, |
|
"loss": 2.4553, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005500673263542272, |
|
"loss": 2.4203, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005491516584001814, |
|
"loss": 2.3956, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005482358239628047, |
|
"loss": 2.3655, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005473198261441526, |
|
"loss": 2.4502, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005464036680468339, |
|
"loss": 2.346, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005454873527740002, |
|
"loss": 2.4168, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005445708834293356, |
|
"loss": 2.4072, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005436542631170459, |
|
"loss": 2.4012, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005427374949418486, |
|
"loss": 2.4058, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005418205820089616, |
|
"loss": 2.4099, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005409035274240935, |
|
"loss": 2.4391, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005399863342934323, |
|
"loss": 2.412, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005390690057236359, |
|
"loss": 2.3586, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00053815154482182, |
|
"loss": 2.4768, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005372339546955492, |
|
"loss": 2.3788, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.000536316238452826, |
|
"loss": 2.263, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005353983992020794, |
|
"loss": 2.4776, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005344804400521554, |
|
"loss": 2.4101, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005335623641123062, |
|
"loss": 2.3999, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005326441744921795, |
|
"loss": 2.4281, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000531725874301808, |
|
"loss": 2.443, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005308074666515989, |
|
"loss": 2.3822, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005298889546523233, |
|
"loss": 2.4589, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005289703414151062, |
|
"loss": 2.4423, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000528051630051415, |
|
"loss": 2.4234, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005271328236730496, |
|
"loss": 2.4683, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005262139253921318, |
|
"loss": 2.3599, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005252949383210948, |
|
"loss": 2.4579, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005243758655726721, |
|
"loss": 2.3679, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005234567102598881, |
|
"loss": 2.3544, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005225374754960462, |
|
"loss": 2.3712, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005216181643947191, |
|
"loss": 2.4384, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005206987800697383, |
|
"loss": 2.3886, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005197793256351832, |
|
"loss": 2.4095, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005188598042053704, |
|
"loss": 2.404, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005179402188948438, |
|
"loss": 2.4051, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005170205728183636, |
|
"loss": 2.4318, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005161008690908956, |
|
"loss": 2.3231, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005151811108276011, |
|
"loss": 2.4394, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.000514261301143826, |
|
"loss": 2.3758, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005133414431550905, |
|
"loss": 2.3559, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005124215399770782, |
|
"loss": 2.4378, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005115015947256259, |
|
"loss": 2.3401, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005105816105167129, |
|
"loss": 2.4664, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005096615904664505, |
|
"loss": 2.308, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005087415376910712, |
|
"loss": 2.316, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005078214553069186, |
|
"loss": 2.4194, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005069013464304365, |
|
"loss": 2.3844, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005059812141781584, |
|
"loss": 2.4863, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005050610616666968, |
|
"loss": 2.3552, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005041408920127332, |
|
"loss": 2.4535, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0005032207083330071, |
|
"loss": 2.3841, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0005023005137443051, |
|
"loss": 2.4558, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0005013803113634514, |
|
"loss": 2.4689, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0005004601043072958, |
|
"loss": 2.4366, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004995398956927044, |
|
"loss": 2.3713, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004986196886365487, |
|
"loss": 2.4485, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004976994862556949, |
|
"loss": 2.41, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004967792916669929, |
|
"loss": 2.4275, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004958591079872668, |
|
"loss": 2.4909, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004949389383333032, |
|
"loss": 2.4397, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004940187858218417, |
|
"loss": 2.495, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004930986535695636, |
|
"loss": 2.3796, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004921785446930815, |
|
"loss": 2.4329, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004912584623089289, |
|
"loss": 2.3956, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004903384095335496, |
|
"loss": 2.4484, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004894183894832872, |
|
"loss": 2.3731, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004884984052743741, |
|
"loss": 2.3831, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004875784600229219, |
|
"loss": 2.4037, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00048665855684490954, |
|
"loss": 2.4096, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000485738698856174, |
|
"loss": 2.3367, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000484818889172399, |
|
"loss": 2.4187, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004838991309091045, |
|
"loss": 2.4407, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004829794271816365, |
|
"loss": 2.4333, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004820597811051563, |
|
"loss": 2.4117, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00048114019579462977, |
|
"loss": 2.4389, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00048022067436481703, |
|
"loss": 2.4473, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004793012199302619, |
|
"loss": 2.3529, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00047838183560528115, |
|
"loss": 2.4897, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004774625245039541, |
|
"loss": 2.3568, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004765432897401121, |
|
"loss": 2.4079, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00047562413442732784, |
|
"loss": 2.4042, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004747050616789052, |
|
"loss": 2.3854, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00047378607460786814, |
|
"loss": 2.3854, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00047286717632695035, |
|
"loss": 2.4135, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00047194836994858503, |
|
"loss": 2.4586, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004710296585848938, |
|
"loss": 2.4829, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004701110453476767, |
|
"loss": 2.4407, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046919253334840125, |
|
"loss": 2.3109, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046827412569819217, |
|
"loss": 2.3722, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046735582550782066, |
|
"loss": 2.3676, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004664376358876939, |
|
"loss": 2.4991, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004655195599478448, |
|
"loss": 2.3987, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004646016007979207, |
|
"loss": 2.3807, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004636837615471741, |
|
"loss": 2.3651, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004627660453044508, |
|
"loss": 2.4073, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046184845517818014, |
|
"loss": 2.4485, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046093099427636433, |
|
"loss": 2.4144, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004600136657065677, |
|
"loss": 2.3495, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004590964725759066, |
|
"loss": 2.4432, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00045817941799103845, |
|
"loss": 2.3367, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004572625050581516, |
|
"loss": 2.4404, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004563457368829542, |
|
"loss": 2.4057, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00045542911657066467, |
|
"loss": 2.4284, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004545126472260001, |
|
"loss": 2.3815, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004535963319531663, |
|
"loss": 2.3614, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004526801738558476, |
|
"loss": 2.4519, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004517641760371955, |
|
"loss": 2.3831, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00045084834159981864, |
|
"loss": 2.346, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00044993267364577285, |
|
"loss": 2.4427, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004490171752765494, |
|
"loss": 2.391, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00044810184959306545, |
|
"loss": 2.451, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00044718669969565317, |
|
"loss": 2.3838, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00044627172868404923, |
|
"loss": 2.4119, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00044535693965738467, |
|
"loss": 2.4274, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004444423357141738, |
|
"loss": 2.3029, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004435279199523043, |
|
"loss": 2.3777, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004426136954690262, |
|
"loss": 2.3987, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00044169966536094184, |
|
"loss": 2.3581, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00044078583272399513, |
|
"loss": 2.3907, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00043987220065346114, |
|
"loss": 2.4248, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00043895877224393545, |
|
"loss": 2.3192, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.000438045550589324, |
|
"loss": 2.358, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00043713253878283233, |
|
"loss": 2.391, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00043621973991695524, |
|
"loss": 2.4907, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004353071570834662, |
|
"loss": 2.4146, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00043439479337340696, |
|
"loss": 2.3712, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004334826518770771, |
|
"loss": 2.4449, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004325707356840237, |
|
"loss": 2.3753, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004316590478830304, |
|
"loss": 2.3247, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004307475915621074, |
|
"loss": 2.3548, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004298363698084809, |
|
"loss": 2.3644, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004289253857085824, |
|
"loss": 2.4098, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00042801464234803864, |
|
"loss": 2.3517, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00042710414281166075, |
|
"loss": 2.439, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00042619389018343435, |
|
"loss": 2.4223, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004252838875465083, |
|
"loss": 2.3211, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004243741379831848, |
|
"loss": 2.401, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004234646445749093, |
|
"loss": 2.4459, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000422555410402259, |
|
"loss": 2.2829, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004216464385449335, |
|
"loss": 2.3647, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004207377320817436, |
|
"loss": 2.4144, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00041982929409060135, |
|
"loss": 2.3446, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00041892112764850927, |
|
"loss": 2.3324, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00041801323583155037, |
|
"loss": 2.2845, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00041710562171487725, |
|
"loss": 2.3375, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000416198288372702, |
|
"loss": 2.3582, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004152912388782856, |
|
"loss": 2.2841, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00041438447630392784, |
|
"loss": 2.3542, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00041347800372095627, |
|
"loss": 2.4362, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00041257182419971654, |
|
"loss": 2.4016, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004116659408095616, |
|
"loss": 2.4134, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004107603566188412, |
|
"loss": 2.3195, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004098550746948918, |
|
"loss": 2.3902, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004089500981040261, |
|
"loss": 2.4356, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004080454299115224, |
|
"loss": 2.4718, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00040714107318161455, |
|
"loss": 2.3165, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004062370309774814, |
|
"loss": 2.3676, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00040533330636123647, |
|
"loss": 2.334, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00040442990239391766, |
|
"loss": 2.4078, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00040352682213547626, |
|
"loss": 2.3833, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004026240686447681, |
|
"loss": 2.3571, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004017216449795415, |
|
"loss": 2.3641, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00040081955419642764, |
|
"loss": 2.367, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003999177993509303, |
|
"loss": 2.3368, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00039901638349741555, |
|
"loss": 2.4514, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003981153096891009, |
|
"loss": 2.3297, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003972145809780457, |
|
"loss": 2.3843, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003963142004151401, |
|
"loss": 2.4125, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00039541417105009506, |
|
"loss": 2.3168, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00039451449593143215, |
|
"loss": 2.4065, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003936151781064731, |
|
"loss": 2.3478, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003927162206213291, |
|
"loss": 2.3989, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00039181762652089114, |
|
"loss": 2.4372, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003909193988488192, |
|
"loss": 2.3873, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003900215406475324, |
|
"loss": 2.4046, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00038912405495819785, |
|
"loss": 2.3524, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00038822694482072153, |
|
"loss": 2.3564, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00038733021327373696, |
|
"loss": 2.369, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003864338633545956, |
|
"loss": 2.3578, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.000385537898099356, |
|
"loss": 2.3602, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.000384642320542774, |
|
"loss": 2.4313, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003837471337182923, |
|
"loss": 2.3291, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003828523406580299, |
|
"loss": 2.3555, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00038195794439277225, |
|
"loss": 2.3163, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00038106394795196086, |
|
"loss": 2.3385, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00038017035436368254, |
|
"loss": 2.4187, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00037927716665466047, |
|
"loss": 2.4404, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00037838438785024216, |
|
"loss": 2.397, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00037749202097439057, |
|
"loss": 2.3734, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00037660006904967325, |
|
"loss": 2.4009, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00037570853509725236, |
|
"loss": 2.4276, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003748174221368742, |
|
"loss": 2.3821, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00037392673318685916, |
|
"loss": 2.321, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00037303647126409154, |
|
"loss": 2.3743, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00037214663938400913, |
|
"loss": 2.3692, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003712572405605932, |
|
"loss": 2.383, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003703682778063581, |
|
"loss": 2.3707, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036947975413234113, |
|
"loss": 2.3864, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036859167254809256, |
|
"loss": 2.3399, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036770403606166516, |
|
"loss": 2.469, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036681684767960387, |
|
"loss": 2.4053, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036593011040693613, |
|
"loss": 2.4008, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036504382724716135, |
|
"loss": 2.3744, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036415800120224057, |
|
"loss": 2.3632, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003632726352725869, |
|
"loss": 2.3286, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003623877324570548, |
|
"loss": 2.4072, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036150329575292996, |
|
"loss": 2.2996, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003606193281559194, |
|
"loss": 2.4821, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00035973583266014133, |
|
"loss": 2.3883, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003588528122581146, |
|
"loss": 2.3579, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003579702699407492, |
|
"loss": 2.3023, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003570882086973355, |
|
"loss": 2.4152, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003562066315155345, |
|
"loss": 2.3282, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003553255413813676, |
|
"loss": 2.2976, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003544449412792069, |
|
"loss": 2.4261, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.000353564834191764, |
|
"loss": 2.4305, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.000352685223100081, |
|
"loss": 2.3566, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00035180611098351987, |
|
"loss": 2.3931, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00035092750081975265, |
|
"loss": 2.4228, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00035004939558475105, |
|
"loss": 2.3614, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003491717982527765, |
|
"loss": 2.3207, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003482947117963702, |
|
"loss": 2.3927, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003474181391863428, |
|
"loss": 2.3219, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003465420833917647, |
|
"loss": 2.4248, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00034566654737995555, |
|
"loss": 2.393, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00034479153411647457, |
|
"loss": 2.3521, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00034391704656511034, |
|
"loss": 2.3803, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00034304308768787076, |
|
"loss": 2.3621, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003421696604449729, |
|
"loss": 2.365, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00034129676779483346, |
|
"loss": 2.3525, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.000340424412694058, |
|
"loss": 2.3504, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00033955259809743157, |
|
"loss": 2.3886, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003386813269579085, |
|
"loss": 2.4235, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003378106022266022, |
|
"loss": 2.3589, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003369404268527754, |
|
"loss": 2.3473, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00033607080378383, |
|
"loss": 2.3863, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00033520173596529725, |
|
"loss": 2.3328, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003343332263408277, |
|
"loss": 2.3727, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003334652778521813, |
|
"loss": 2.3475, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003325978934392171, |
|
"loss": 2.3711, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003317310760398839, |
|
"loss": 2.2961, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00033086482859020957, |
|
"loss": 2.3632, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00032999915402429173, |
|
"loss": 2.3933, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003291340552742875, |
|
"loss": 2.3845, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003282695352704036, |
|
"loss": 2.3837, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00032740559694088655, |
|
"loss": 2.3652, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003265422432120125, |
|
"loss": 2.5405, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003256794770080778, |
|
"loss": 2.3815, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00032481730125138823, |
|
"loss": 2.4564, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003239557188622503, |
|
"loss": 2.367, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003230947327589602, |
|
"loss": 2.4146, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003222343458577945, |
|
"loss": 2.3144, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00032137456107300054, |
|
"loss": 2.3881, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003205153813167858, |
|
"loss": 2.3861, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00031965680949930854, |
|
"loss": 2.292, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00031879884852866803, |
|
"loss": 2.418, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00031794150131089435, |
|
"loss": 2.4622, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003170847707499387, |
|
"loss": 2.3496, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003162286597476638, |
|
"loss": 2.3934, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003153731712038335, |
|
"loss": 2.3758, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003145183080161036, |
|
"loss": 2.3755, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0003136640730800116, |
|
"loss": 2.339, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.000312810469288967, |
|
"loss": 2.3153, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00031195749953424163, |
|
"loss": 2.4115, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00031110516670495946, |
|
"loss": 2.3596, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00031025347368808775, |
|
"loss": 2.3859, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.000309402423368426, |
|
"loss": 2.3421, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00030855201862859706, |
|
"loss": 2.4224, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0003077022623490371, |
|
"loss": 2.4121, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0003068531574079857, |
|
"loss": 2.3836, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00030600470668147654, |
|
"loss": 2.3476, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00030515691304332725, |
|
"loss": 2.3695, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0003043097793651299, |
|
"loss": 2.4237, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00030346330851624095, |
|
"loss": 2.3247, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00030261750336377203, |
|
"loss": 2.2692, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0003017723667725798, |
|
"loss": 2.3942, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00030092790160525653, |
|
"loss": 2.4238, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00030008411072212027, |
|
"loss": 2.3459, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.000299240996981205, |
|
"loss": 2.3385, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002983985632382514, |
|
"loss": 2.4455, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002975568123466966, |
|
"loss": 2.3829, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00029671574715766524, |
|
"loss": 2.2501, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.000295875370519959, |
|
"loss": 2.3536, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002950356852800475, |
|
"loss": 2.2893, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00029419669428205853, |
|
"loss": 2.3131, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00029335840036776836, |
|
"loss": 2.3992, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.000292520806376592, |
|
"loss": 2.4069, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.000291683915145574, |
|
"loss": 2.3701, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002908477295093784, |
|
"loss": 2.4106, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002900122523002792, |
|
"loss": 2.3763, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.000289177486348151, |
|
"loss": 2.385, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00028834343448045936, |
|
"loss": 2.433, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00028751009952225116, |
|
"loss": 2.338, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002866774842961445, |
|
"loss": 2.3869, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00028584559162232054, |
|
"loss": 2.4237, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.000285014424318512, |
|
"loss": 2.4219, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002841839851999958, |
|
"loss": 2.3598, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00028335427707958116, |
|
"loss": 2.3515, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002825253027676026, |
|
"loss": 2.37, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00028169706507190806, |
|
"loss": 2.3696, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00028086956679785127, |
|
"loss": 2.396, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002800428107482806, |
|
"loss": 2.4012, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002792167997235313, |
|
"loss": 2.3801, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002783915365214147, |
|
"loss": 2.3561, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00027756702393720876, |
|
"loss": 2.3585, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00027674326476364995, |
|
"loss": 2.394, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00027592026179092176, |
|
"loss": 2.3956, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002750980178066472, |
|
"loss": 2.3338, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00027427653559587776, |
|
"loss": 2.4041, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00027345581794108555, |
|
"loss": 2.2587, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.000272635867622152, |
|
"loss": 2.3868, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00027181668741636046, |
|
"loss": 2.354, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.000270998280098385, |
|
"loss": 2.2642, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00027018064844028244, |
|
"loss": 2.4138, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00026936379521148184, |
|
"loss": 2.3258, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002685477231787761, |
|
"loss": 2.4207, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00026773243510631146, |
|
"loss": 2.3677, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002669179337555794, |
|
"loss": 2.3905, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002661042218854063, |
|
"loss": 2.3724, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002652913022519449, |
|
"loss": 2.3516, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002644791776086638, |
|
"loss": 2.3496, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002636678507063397, |
|
"loss": 2.307, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002628573242930463, |
|
"loss": 2.402, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.000262047601114147, |
|
"loss": 2.3445, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002612386839122834, |
|
"loss": 2.3665, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002604305754273684, |
|
"loss": 2.3387, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00025962327839657435, |
|
"loss": 2.3526, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00025881679555432623, |
|
"loss": 2.4376, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002580111296322904, |
|
"loss": 2.4212, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002572062833593669, |
|
"loss": 2.3326, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00025640225946167895, |
|
"loss": 2.3383, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00025559906066256467, |
|
"loss": 2.4154, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.000254796689682567, |
|
"loss": 2.3926, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002539951492394256, |
|
"loss": 2.3123, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002531944420480662, |
|
"loss": 2.2899, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00025239457082059297, |
|
"loss": 2.3844, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00025159553826627825, |
|
"loss": 2.3151, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00025079734709155323, |
|
"loss": 2.4088, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002500000000000001, |
|
"loss": 2.4768, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00024920349969234114, |
|
"loss": 2.3978, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002484078488664313, |
|
"loss": 2.3491, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00024761305021724734, |
|
"loss": 2.3269, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002468191064368805, |
|
"loss": 2.3558, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002460260202145256, |
|
"loss": 2.3755, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00024523379423647333, |
|
"loss": 2.3858, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002444424311861006, |
|
"loss": 2.3476, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00024365193374386148, |
|
"loss": 2.3317, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00024286230458727754, |
|
"loss": 2.369, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00024207354639093026, |
|
"loss": 2.277, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00024128566182645024, |
|
"loss": 2.3561, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00024049865356250955, |
|
"loss": 2.3518, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023971252426481166, |
|
"loss": 2.3853, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023892727659608338, |
|
"loss": 2.3574, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023814291321606462, |
|
"loss": 2.2977, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023735943678150097, |
|
"loss": 2.4343, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023657684994613282, |
|
"loss": 2.3817, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023579515536068836, |
|
"loss": 2.4122, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023501435567287277, |
|
"loss": 2.3791, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002342344535273608, |
|
"loss": 2.3868, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002334554515657863, |
|
"loss": 2.4097, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.000232677352426735, |
|
"loss": 2.316, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023190015874573373, |
|
"loss": 2.3906, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023112387315524336, |
|
"loss": 2.3863, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023034849828464787, |
|
"loss": 2.3612, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00022957403676024762, |
|
"loss": 2.3062, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00022880049120524833, |
|
"loss": 2.3007, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002280278642397541, |
|
"loss": 2.2701, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002272561584807567, |
|
"loss": 2.4067, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002264853765421287, |
|
"loss": 2.339, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002257155210346124, |
|
"loss": 2.314, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002249465945658135, |
|
"loss": 2.3595, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00022417859974018967, |
|
"loss": 2.2792, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00022341153915904387, |
|
"loss": 2.3722, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00022264541542051398, |
|
"loss": 2.3774, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00022188023111956517, |
|
"loss": 2.2806, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00022111598884798022, |
|
"loss": 2.3556, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00022035269119435164, |
|
"loss": 2.3964, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00021959034074407163, |
|
"loss": 2.3563, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00021882894007932492, |
|
"loss": 2.3775, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00021806849177907845, |
|
"loss": 2.3476, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002173089984190739, |
|
"loss": 2.3872, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00021655046257181842, |
|
"loss": 2.2585, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00021579288680657538, |
|
"loss": 2.3441, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00021503627368935703, |
|
"loss": 2.3755, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00021428062578291414, |
|
"loss": 2.3433, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00021352594564672905, |
|
"loss": 2.3281, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00021277223583700527, |
|
"loss": 2.3531, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002120194989066605, |
|
"loss": 2.3857, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002112677374053164, |
|
"loss": 2.3441, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00021051695387929153, |
|
"loss": 2.3564, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020976715087159104, |
|
"loss": 2.396, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020901833092189982, |
|
"loss": 2.3913, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002082704965665721, |
|
"loss": 2.3628, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020752365033862463, |
|
"loss": 2.288, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020677779476772651, |
|
"loss": 2.3476, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020603293238019183, |
|
"loss": 2.3533, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020528906569897043, |
|
"loss": 2.281, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020454619724363994, |
|
"loss": 2.4064, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020380432953039603, |
|
"loss": 2.4079, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020306346507204564, |
|
"loss": 2.3546, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020232360637799685, |
|
"loss": 2.3198, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020158475595425158, |
|
"loss": 2.3119, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00020084691630339614, |
|
"loss": 2.3479, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00020011008992459374, |
|
"loss": 2.3278, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001993742793135749, |
|
"loss": 2.3611, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001986394869626303, |
|
"loss": 2.2788, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019790571536060087, |
|
"loss": 2.3928, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019717296699287086, |
|
"loss": 2.3541, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019644124434135802, |
|
"loss": 2.3972, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019571054988450648, |
|
"loss": 2.4321, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001949808860972771, |
|
"loss": 2.3616, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001942522554511404, |
|
"loss": 2.3426, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019352466041406684, |
|
"loss": 2.309, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019279810345051957, |
|
"loss": 2.3185, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001920725870214458, |
|
"loss": 2.3605, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019134811358426757, |
|
"loss": 2.2825, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001906246855928751, |
|
"loss": 2.3287, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00018990230549761666, |
|
"loss": 2.3759, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001891809757452919, |
|
"loss": 2.3883, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00018846069877914224, |
|
"loss": 2.3734, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018774147703884365, |
|
"loss": 2.3386, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018702331296049742, |
|
"loss": 2.3961, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018630620897662275, |
|
"loss": 2.305, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001855901675161481, |
|
"loss": 2.2302, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018487519100440314, |
|
"loss": 2.3607, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018416128186310988, |
|
"loss": 2.405, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018344844251037574, |
|
"loss": 2.3226, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001827366753606839, |
|
"loss": 2.4195, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018202598282488652, |
|
"loss": 2.3763, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001813163673101953, |
|
"loss": 2.3062, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018060783122017448, |
|
"loss": 2.3832, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017990037695473154, |
|
"loss": 2.2896, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017919400691011035, |
|
"loss": 2.3387, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017848872347888162, |
|
"loss": 2.357, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001777845290499363, |
|
"loss": 2.2424, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001770814260084761, |
|
"loss": 2.4304, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017637941673600665, |
|
"loss": 2.2946, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001756785036103282, |
|
"loss": 2.3486, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017497868900552888, |
|
"loss": 2.3464, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001742799752919753, |
|
"loss": 2.3703, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001735823648363059, |
|
"loss": 2.3988, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017288586000142149, |
|
"loss": 2.4077, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017219046314647873, |
|
"loss": 2.2285, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017149617662688072, |
|
"loss": 2.3668, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017080300279427035, |
|
"loss": 2.3386, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017011094399652104, |
|
"loss": 2.3591, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016942000257772998, |
|
"loss": 2.2742, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001687301808782095, |
|
"loss": 2.2877, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016804148123447898, |
|
"loss": 2.3301, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016735390597925765, |
|
"loss": 2.2378, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016666745744145617, |
|
"loss": 2.3547, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016598213794616863, |
|
"loss": 2.3288, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016529794981466527, |
|
"loss": 2.3687, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016461489536438378, |
|
"loss": 2.3908, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016393297690892256, |
|
"loss": 2.4032, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001632521967580315, |
|
"loss": 2.3021, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00016257255721760538, |
|
"loss": 2.2437, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00016189406058967577, |
|
"loss": 2.4009, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00016121670917240228, |
|
"loss": 2.4062, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00016054050526006635, |
|
"loss": 2.2743, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.000159865451143062, |
|
"loss": 2.3105, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001591915491078894, |
|
"loss": 2.369, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015851880143714575, |
|
"loss": 2.3745, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015784721040951899, |
|
"loss": 2.3623, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001571767782997786, |
|
"loss": 2.3515, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015650750737876935, |
|
"loss": 2.3739, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001558393999134023, |
|
"loss": 2.371, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015517245816664834, |
|
"loss": 2.2585, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015450668439752924, |
|
"loss": 2.3359, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001538420808611114, |
|
"loss": 2.4117, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015317864980849666, |
|
"loss": 2.3056, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001525163934868164, |
|
"loss": 2.3315, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00015185531413922215, |
|
"loss": 2.3256, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001511954140048797, |
|
"loss": 2.3463, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001505366953189597, |
|
"loss": 2.3142, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014987916031263232, |
|
"loss": 2.317, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014922281121305736, |
|
"loss": 2.4335, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014856765024337843, |
|
"loss": 2.3708, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014791367962271423, |
|
"loss": 2.299, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014726090156615235, |
|
"loss": 2.3126, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001466093182847401, |
|
"loss": 2.3929, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014595893198547888, |
|
"loss": 2.3254, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014530974487131482, |
|
"loss": 2.3279, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014466175914113304, |
|
"loss": 2.3223, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014401497698974874, |
|
"loss": 2.3211, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014336940060790117, |
|
"loss": 2.3453, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014272503218224454, |
|
"loss": 2.4381, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014208187389534255, |
|
"loss": 2.3102, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014143992792565917, |
|
"loss": 2.3664, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014079919644755258, |
|
"loss": 2.3132, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001401596816312673, |
|
"loss": 2.3598, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013952138564292643, |
|
"loss": 2.404, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013888431064452527, |
|
"loss": 2.4006, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013824845879392301, |
|
"loss": 2.3513, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001376138322448363, |
|
"loss": 2.2951, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013698043314683107, |
|
"loss": 2.3619, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013634826364531617, |
|
"loss": 2.3567, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013571732588153512, |
|
"loss": 2.324, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013508762199256004, |
|
"loss": 2.2771, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013445915411128295, |
|
"loss": 2.3974, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013383192436641, |
|
"loss": 2.4326, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013320593488245336, |
|
"loss": 2.3954, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001325811877797245, |
|
"loss": 2.3697, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013195768517432611, |
|
"loss": 2.4528, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001313354291781465, |
|
"loss": 2.2912, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001307144218988507, |
|
"loss": 2.2383, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013009466543987487, |
|
"loss": 2.3279, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00012947616190041783, |
|
"loss": 2.3716, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012885891337543538, |
|
"loss": 2.3219, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012824292195563148, |
|
"loss": 2.4057, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012762818972745292, |
|
"loss": 2.3609, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001270147187730809, |
|
"loss": 2.3141, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001264025111704249, |
|
"loss": 2.3393, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012579156899311485, |
|
"loss": 2.291, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012518189431049497, |
|
"loss": 2.3709, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012457348918761585, |
|
"loss": 2.2204, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012396635568522835, |
|
"loss": 2.3543, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012336049585977567, |
|
"loss": 2.3487, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012275591176338753, |
|
"loss": 2.3581, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012215260544387187, |
|
"loss": 2.3577, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012155057894470928, |
|
"loss": 2.3512, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012094983430504491, |
|
"loss": 2.2911, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012035037355968259, |
|
"loss": 2.3251, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00011975219873907678, |
|
"loss": 2.2616, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00011915531186932688, |
|
"loss": 2.3803, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011855971497216983, |
|
"loss": 2.3612, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011796541006497269, |
|
"loss": 2.3583, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011737239916072695, |
|
"loss": 2.3789, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011678068426804106, |
|
"loss": 2.457, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011619026739113331, |
|
"loss": 2.2434, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011560115052982606, |
|
"loss": 2.2616, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011501333567953758, |
|
"loss": 2.3304, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011442682483127687, |
|
"loss": 2.3475, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011384161997163533, |
|
"loss": 2.2778, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011325772308278132, |
|
"loss": 2.3904, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011267513614245289, |
|
"loss": 2.3907, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011209386112395076, |
|
"loss": 2.3336, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011151389999613248, |
|
"loss": 2.3793, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011093525472340472, |
|
"loss": 2.3641, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011035792726571776, |
|
"loss": 2.3915, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00010978191957855771, |
|
"loss": 2.3244, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00010920723361294099, |
|
"loss": 2.2922, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00010863387131540653, |
|
"loss": 2.4013, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001080618346280105, |
|
"loss": 2.3676, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010749112548831846, |
|
"loss": 2.3596, |
|
"step": 1401 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001069217458293999, |
|
"loss": 2.3525, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010635369757982061, |
|
"loss": 2.3417, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010578698266363734, |
|
"loss": 2.358, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010522160300039007, |
|
"loss": 2.3709, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010465756050509661, |
|
"loss": 2.4521, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010409485708824506, |
|
"loss": 2.4123, |
|
"step": 1407 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010353349465578859, |
|
"loss": 2.3052, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010297347510913746, |
|
"loss": 2.3366, |
|
"step": 1409 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010241480034515404, |
|
"loss": 2.3183, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010185747225614539, |
|
"loss": 2.3082, |
|
"step": 1411 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010130149272985756, |
|
"loss": 2.37, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010074686364946823, |
|
"loss": 2.399, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010019358689358154, |
|
"loss": 2.4144, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.964166433622068e-05, |
|
"loss": 2.3796, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.90910978468224e-05, |
|
"loss": 2.3717, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.854188929022984e-05, |
|
"loss": 2.3963, |
|
"step": 1417 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.799404052668704e-05, |
|
"loss": 2.3167, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.744755341183181e-05, |
|
"loss": 2.4056, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.690242979669035e-05, |
|
"loss": 2.3068, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.635867152766997e-05, |
|
"loss": 2.3482, |
|
"step": 1421 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.581628044655394e-05, |
|
"loss": 2.3084, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.527525839049406e-05, |
|
"loss": 2.3281, |
|
"step": 1423 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.473560719200564e-05, |
|
"loss": 2.3111, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.419732867896047e-05, |
|
"loss": 2.3346, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.366042467458064e-05, |
|
"loss": 2.3117, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.312489699743309e-05, |
|
"loss": 2.3598, |
|
"step": 1427 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.259074746142238e-05, |
|
"loss": 2.3327, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.205797787578563e-05, |
|
"loss": 2.3157, |
|
"step": 1429 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.152659004508545e-05, |
|
"loss": 2.4291, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.099658576920466e-05, |
|
"loss": 2.3179, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.046796684333947e-05, |
|
"loss": 2.3465, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.994073505799399e-05, |
|
"loss": 2.395, |
|
"step": 1433 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.941489219897354e-05, |
|
"loss": 2.3563, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.889044004737939e-05, |
|
"loss": 2.3259, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.836738037960179e-05, |
|
"loss": 2.3736, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.78457149673152e-05, |
|
"loss": 2.3209, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.732544557747073e-05, |
|
"loss": 2.3401, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.680657397229158e-05, |
|
"loss": 2.3871, |
|
"step": 1439 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.628910190926598e-05, |
|
"loss": 2.3419, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.577303114114227e-05, |
|
"loss": 2.3418, |
|
"step": 1441 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.525836341592175e-05, |
|
"loss": 2.4486, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.474510047685408e-05, |
|
"loss": 2.3244, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.423324406243016e-05, |
|
"loss": 2.3141, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.372279590637738e-05, |
|
"loss": 2.321, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.321375773765255e-05, |
|
"loss": 2.2767, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.27061312804372e-05, |
|
"loss": 2.3465, |
|
"step": 1447 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.21999182541307e-05, |
|
"loss": 2.3883, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.169512037334554e-05, |
|
"loss": 2.3339, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.119173934790019e-05, |
|
"loss": 2.3466, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.068977688281482e-05, |
|
"loss": 2.3427, |
|
"step": 1451 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.018923467830403e-05, |
|
"loss": 2.3627, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.969011442977237e-05, |
|
"loss": 2.3335, |
|
"step": 1453 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.919241782780756e-05, |
|
"loss": 2.2868, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.869614655817576e-05, |
|
"loss": 2.3564, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.820130230181472e-05, |
|
"loss": 2.333, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.770788673482921e-05, |
|
"loss": 2.2877, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.721590152848473e-05, |
|
"loss": 2.3721, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.672534834920164e-05, |
|
"loss": 2.3642, |
|
"step": 1459 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.623622885855036e-05, |
|
"loss": 2.3208, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.574854471324461e-05, |
|
"loss": 2.3572, |
|
"step": 1461 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.526229756513686e-05, |
|
"loss": 2.3688, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.477748906121223e-05, |
|
"loss": 2.3688, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.429412084358261e-05, |
|
"loss": 2.319, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.381219454948196e-05, |
|
"loss": 2.3633, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.333171181125975e-05, |
|
"loss": 2.3446, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.285267425637621e-05, |
|
"loss": 2.295, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.237508350739663e-05, |
|
"loss": 2.3123, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.189894118198542e-05, |
|
"loss": 2.3276, |
|
"step": 1469 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.142424889290139e-05, |
|
"loss": 2.3237, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.095100824799145e-05, |
|
"loss": 2.3258, |
|
"step": 1471 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.047922085018604e-05, |
|
"loss": 2.3153, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.00088882974929e-05, |
|
"loss": 2.3657, |
|
"step": 1473 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.954001218299227e-05, |
|
"loss": 2.3485, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.907259409483097e-05, |
|
"loss": 2.3716, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.860663561621766e-05, |
|
"loss": 2.3926, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.814213832541655e-05, |
|
"loss": 2.3702, |
|
"step": 1477 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.767910379574332e-05, |
|
"loss": 2.3756, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.721753359555833e-05, |
|
"loss": 2.3763, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.675742928826273e-05, |
|
"loss": 2.373, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.629879243229198e-05, |
|
"loss": 2.2572, |
|
"step": 1481 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.584162458111148e-05, |
|
"loss": 2.3411, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.538592728321053e-05, |
|
"loss": 2.3552, |
|
"step": 1483 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.493170208209781e-05, |
|
"loss": 2.4024, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.447895051629549e-05, |
|
"loss": 2.3138, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.402767411933475e-05, |
|
"loss": 2.3135, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.357787441974967e-05, |
|
"loss": 2.271, |
|
"step": 1487 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.312955294107303e-05, |
|
"loss": 2.3715, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.268271120183022e-05, |
|
"loss": 2.3508, |
|
"step": 1489 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.22373507155351e-05, |
|
"loss": 2.3682, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.179347299068378e-05, |
|
"loss": 2.361, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.135107953075048e-05, |
|
"loss": 2.2785, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.091017183418157e-05, |
|
"loss": 2.3612, |
|
"step": 1493 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.047075139439151e-05, |
|
"loss": 2.3511, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.00328196997566e-05, |
|
"loss": 2.3548, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.9596378233611135e-05, |
|
"loss": 2.257, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.9161428474241274e-05, |
|
"loss": 2.2737, |
|
"step": 1497 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.872797189488105e-05, |
|
"loss": 2.3026, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.829600996370649e-05, |
|
"loss": 2.2523, |
|
"step": 1499 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.7865544143831274e-05, |
|
"loss": 2.2814, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.743657589330159e-05, |
|
"loss": 2.2883, |
|
"step": 1501 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.700910666509096e-05, |
|
"loss": 2.2735, |
|
"step": 1502 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.6583137907095686e-05, |
|
"loss": 2.3183, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.615867106212957e-05, |
|
"loss": 2.3637, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.573570756791957e-05, |
|
"loss": 2.2044, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.531424885710012e-05, |
|
"loss": 2.3988, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.4894296357209274e-05, |
|
"loss": 2.3839, |
|
"step": 1507 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.4475851490682904e-05, |
|
"loss": 2.3843, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.4058915674850664e-05, |
|
"loss": 2.3209, |
|
"step": 1509 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.36434903219305e-05, |
|
"loss": 2.3541, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.322957683902457e-05, |
|
"loss": 2.3996, |
|
"step": 1511 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.2817176628113807e-05, |
|
"loss": 2.3179, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.2406291086053826e-05, |
|
"loss": 2.3432, |
|
"step": 1513 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.199692160456948e-05, |
|
"loss": 2.367, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.158906957025078e-05, |
|
"loss": 2.368, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.118273636454784e-05, |
|
"loss": 2.3652, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.077792336376647e-05, |
|
"loss": 2.3421, |
|
"step": 1517 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.0374631939062943e-05, |
|
"loss": 2.3421, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.99728634564402e-05, |
|
"loss": 2.2835, |
|
"step": 1519 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.9572619276742393e-05, |
|
"loss": 2.3785, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.917390075565098e-05, |
|
"loss": 2.3257, |
|
"step": 1521 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.8776709243679495e-05, |
|
"loss": 2.3212, |
|
"step": 1522 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.838104608616961e-05, |
|
"loss": 2.3864, |
|
"step": 1523 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.798691262328586e-05, |
|
"loss": 2.3235, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.759431019001198e-05, |
|
"loss": 2.3822, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.720324011614535e-05, |
|
"loss": 2.3212, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.6813703726293676e-05, |
|
"loss": 2.451, |
|
"step": 1527 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.642570233986915e-05, |
|
"loss": 2.3598, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.6039237271085386e-05, |
|
"loss": 2.2875, |
|
"step": 1529 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.565430982895175e-05, |
|
"loss": 2.4021, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.52709213172699e-05, |
|
"loss": 2.3524, |
|
"step": 1531 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.488907303462841e-05, |
|
"loss": 2.3779, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.4508766274399424e-05, |
|
"loss": 2.3639, |
|
"step": 1533 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.4130002324733355e-05, |
|
"loss": 2.3627, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.375278246855513e-05, |
|
"loss": 2.3944, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.3377107983559596e-05, |
|
"loss": 2.2929, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.300298014220699e-05, |
|
"loss": 2.3835, |
|
"step": 1537 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.2630400211719264e-05, |
|
"loss": 2.2726, |
|
"step": 1538 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.2259369454074935e-05, |
|
"loss": 2.312, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.188988912600566e-05, |
|
"loss": 2.3564, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.152196047899126e-05, |
|
"loss": 2.4055, |
|
"step": 1541 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.115558475925601e-05, |
|
"loss": 2.3959, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.079076320776426e-05, |
|
"loss": 2.3602, |
|
"step": 1543 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.042749706021581e-05, |
|
"loss": 2.362, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.0065787547042544e-05, |
|
"loss": 2.2404, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9705635893403594e-05, |
|
"loss": 2.3495, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.934704331918137e-05, |
|
"loss": 2.3613, |
|
"step": 1547 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.89900110389777e-05, |
|
"loss": 2.255, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.863454026210911e-05, |
|
"loss": 2.3231, |
|
"step": 1549 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.828063219260347e-05, |
|
"loss": 2.3597, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.792828802919529e-05, |
|
"loss": 2.4335, |
|
"step": 1551 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.757750896532214e-05, |
|
"loss": 2.3403, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.72282961891201e-05, |
|
"loss": 2.4333, |
|
"step": 1553 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.688065088342041e-05, |
|
"loss": 2.377, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.653457422574458e-05, |
|
"loss": 2.3642, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.619006738830144e-05, |
|
"loss": 2.4349, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.584713153798214e-05, |
|
"loss": 2.3163, |
|
"step": 1557 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.550576783635712e-05, |
|
"loss": 2.3527, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.516597743967126e-05, |
|
"loss": 2.3009, |
|
"step": 1559 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.4827761498840924e-05, |
|
"loss": 2.3623, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.449112115944919e-05, |
|
"loss": 2.2798, |
|
"step": 1561 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.415605756174267e-05, |
|
"loss": 2.3023, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.382257184062709e-05, |
|
"loss": 2.4368, |
|
"step": 1563 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.34906651256639e-05, |
|
"loss": 2.3222, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.316033854106604e-05, |
|
"loss": 2.2483, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.283159320569451e-05, |
|
"loss": 2.2451, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.2504430233054274e-05, |
|
"loss": 2.3724, |
|
"step": 1567 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.217885073129067e-05, |
|
"loss": 2.3633, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.1854855803185645e-05, |
|
"loss": 2.408, |
|
"step": 1569 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.153244654615406e-05, |
|
"loss": 2.3808, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.121162405223954e-05, |
|
"loss": 2.4206, |
|
"step": 1571 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.089238940811162e-05, |
|
"loss": 2.3333, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.057474369506108e-05, |
|
"loss": 2.306, |
|
"step": 1573 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.025868798899728e-05, |
|
"loss": 2.2793, |
|
"step": 1574 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.9944223360443446e-05, |
|
"loss": 2.4081, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9631350874534225e-05, |
|
"loss": 2.3331, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9320071591010854e-05, |
|
"loss": 2.3505, |
|
"step": 1577 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9010386564218615e-05, |
|
"loss": 2.2429, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8702296843102715e-05, |
|
"loss": 2.2545, |
|
"step": 1579 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8395803471204684e-05, |
|
"loss": 2.3952, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8090907486659322e-05, |
|
"loss": 2.3529, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7787609922190427e-05, |
|
"loss": 2.2856, |
|
"step": 1582 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7485911805108178e-05, |
|
"loss": 2.3722, |
|
"step": 1583 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.718581415730481e-05, |
|
"loss": 2.3342, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.688731799525196e-05, |
|
"loss": 2.3142, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.6590424329996367e-05, |
|
"loss": 2.2973, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.6295134167157342e-05, |
|
"loss": 2.3004, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.6001448506922544e-05, |
|
"loss": 2.3562, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.5709368344045248e-05, |
|
"loss": 2.2182, |
|
"step": 1589 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.5418894667840364e-05, |
|
"loss": 2.2891, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.5130028462181876e-05, |
|
"loss": 2.2727, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.4842770705498563e-05, |
|
"loss": 2.3455, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.455712237077157e-05, |
|
"loss": 2.3612, |
|
"step": 1593 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.427308442553028e-05, |
|
"loss": 2.2578, |
|
"step": 1594 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.399065783185006e-05, |
|
"loss": 2.2582, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.3709843546347864e-05, |
|
"loss": 2.3917, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.3430642520179847e-05, |
|
"loss": 2.2371, |
|
"step": 1597 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.3153055699037583e-05, |
|
"loss": 2.3884, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.2877084023145424e-05, |
|
"loss": 2.2932, |
|
"step": 1599 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.26027284272568e-05, |
|
"loss": 2.3436, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.232998984065132e-05, |
|
"loss": 2.3251, |
|
"step": 1601 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.2058869187131515e-05, |
|
"loss": 2.3943, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.1789367385019854e-05, |
|
"loss": 2.3229, |
|
"step": 1603 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.152148534715537e-05, |
|
"loss": 2.3256, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.1255223980891027e-05, |
|
"loss": 2.3357, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0990584188089966e-05, |
|
"loss": 2.36, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0727566865123228e-05, |
|
"loss": 2.4062, |
|
"step": 1607 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.046617290286601e-05, |
|
"loss": 2.3019, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0206403186695198e-05, |
|
"loss": 2.3427, |
|
"step": 1609 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.994825859648597e-05, |
|
"loss": 2.3014, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.969174000660906e-05, |
|
"loss": 2.3167, |
|
"step": 1611 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.9436848285927787e-05, |
|
"loss": 2.2969, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.918358429779482e-05, |
|
"loss": 2.3046, |
|
"step": 1613 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.893194890004979e-05, |
|
"loss": 2.3401, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.868194294501574e-05, |
|
"loss": 2.2342, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.843356727949691e-05, |
|
"loss": 2.3481, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.8186822744775234e-05, |
|
"loss": 2.2885, |
|
"step": 1617 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.794171017660795e-05, |
|
"loss": 2.3167, |
|
"step": 1618 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7698230405224504e-05, |
|
"loss": 2.3262, |
|
"step": 1619 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.745638425532392e-05, |
|
"loss": 2.358, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7216172546071885e-05, |
|
"loss": 2.3507, |
|
"step": 1621 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6977596091098113e-05, |
|
"loss": 2.3153, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6740655698493313e-05, |
|
"loss": 2.2792, |
|
"step": 1623 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6505352170806787e-05, |
|
"loss": 2.2654, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.627168630504339e-05, |
|
"loss": 2.3947, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.603965889266118e-05, |
|
"loss": 2.3774, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5809270719568335e-05, |
|
"loss": 2.3866, |
|
"step": 1627 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5580522566121024e-05, |
|
"loss": 2.3637, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.535341520712008e-05, |
|
"loss": 2.3164, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.512794941180906e-05, |
|
"loss": 2.279, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4904125943871028e-05, |
|
"loss": 2.3181, |
|
"step": 1631 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4681945561426547e-05, |
|
"loss": 2.3144, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.446140901703058e-05, |
|
"loss": 2.4334, |
|
"step": 1633 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4242517057670434e-05, |
|
"loss": 2.364, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.402527042476276e-05, |
|
"loss": 2.3259, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.380966985415144e-05, |
|
"loss": 2.3583, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3595716076104658e-05, |
|
"loss": 2.3553, |
|
"step": 1637 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3383409815313108e-05, |
|
"loss": 2.2715, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3172751790886672e-05, |
|
"loss": 2.3486, |
|
"step": 1639 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2963742716352812e-05, |
|
"loss": 2.3027, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2756383299653452e-05, |
|
"loss": 2.3052, |
|
"step": 1641 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2550674243143156e-05, |
|
"loss": 2.3633, |
|
"step": 1642 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2346616243586294e-05, |
|
"loss": 2.3606, |
|
"step": 1643 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2144209992155042e-05, |
|
"loss": 2.3093, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1943456174426825e-05, |
|
"loss": 2.364, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1744355470381996e-05, |
|
"loss": 2.4326, |
|
"step": 1646 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1546908554401658e-05, |
|
"loss": 2.3876, |
|
"step": 1647 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.135111609526529e-05, |
|
"loss": 2.3325, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1156978756148462e-05, |
|
"loss": 2.3742, |
|
"step": 1649 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0964497194620726e-05, |
|
"loss": 2.3834, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.077367206264318e-05, |
|
"loss": 2.3848, |
|
"step": 1651 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0584504006566465e-05, |
|
"loss": 2.3192, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0396993667128318e-05, |
|
"loss": 2.4382, |
|
"step": 1653 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0211141679451753e-05, |
|
"loss": 2.2944, |
|
"step": 1654 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0026948673042768e-05, |
|
"loss": 2.3364, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.84441527178781e-06, |
|
"loss": 2.368, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.663542093952483e-06, |
|
"loss": 2.3153, |
|
"step": 1657 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.484329752178555e-06, |
|
"loss": 2.3558, |
|
"step": 1658 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.306778853482744e-06, |
|
"loss": 2.348, |
|
"step": 1659 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.130889999253822e-06, |
|
"loss": 2.3848, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.95666378525134e-06, |
|
"loss": 2.3773, |
|
"step": 1661 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.784100801602912e-06, |
|
"loss": 2.3582, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.613201632802992e-06, |
|
"loss": 2.3574, |
|
"step": 1663 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.443966857710095e-06, |
|
"loss": 2.3734, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.276397049545359e-06, |
|
"loss": 2.291, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.110492775890433e-06, |
|
"loss": 2.3514, |
|
"step": 1666 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.946254598685643e-06, |
|
"loss": 2.3549, |
|
"step": 1667 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.783683074227943e-06, |
|
"loss": 2.3524, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.622778753169079e-06, |
|
"loss": 2.3111, |
|
"step": 1669 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.463542180513871e-06, |
|
"loss": 2.323, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.305973895618157e-06, |
|
"loss": 2.3242, |
|
"step": 1671 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.150074432187015e-06, |
|
"loss": 2.3144, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.995844318273104e-06, |
|
"loss": 2.3182, |
|
"step": 1673 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.8432840762747695e-06, |
|
"loss": 2.3319, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.692394222934272e-06, |
|
"loss": 2.3763, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.543175269335844e-06, |
|
"loss": 2.3976, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.395627720904518e-06, |
|
"loss": 2.341, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.249752077403636e-06, |
|
"loss": 2.3106, |
|
"step": 1678 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.10554883293396e-06, |
|
"loss": 2.3996, |
|
"step": 1679 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.963018475931281e-06, |
|
"loss": 2.3723, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.8221614891653115e-06, |
|
"loss": 2.3411, |
|
"step": 1681 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.682978349737799e-06, |
|
"loss": 2.2581, |
|
"step": 1682 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.54546952908086e-06, |
|
"loss": 2.3132, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.409635492955478e-06, |
|
"loss": 2.2714, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.275476701450066e-06, |
|
"loss": 2.3827, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.142993608978519e-06, |
|
"loss": 2.3675, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.012186664279106e-06, |
|
"loss": 2.3512, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.883056310412581e-06, |
|
"loss": 2.2707, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.755602984761132e-06, |
|
"loss": 2.3824, |
|
"step": 1689 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.62982711902632e-06, |
|
"loss": 2.307, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.505729139228087e-06, |
|
"loss": 2.403, |
|
"step": 1691 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.383309465703145e-06, |
|
"loss": 2.2792, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.262568513103415e-06, |
|
"loss": 2.2435, |
|
"step": 1693 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.143506690395038e-06, |
|
"loss": 2.3489, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.026124400856479e-06, |
|
"loss": 2.3703, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.910422042077421e-06, |
|
"loss": 2.3224, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.79640000595749e-06, |
|
"loss": 2.4044, |
|
"step": 1697 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.6840586787048045e-06, |
|
"loss": 2.3773, |
|
"step": 1698 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.573398440834541e-06, |
|
"loss": 2.3467, |
|
"step": 1699 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.464419667168095e-06, |
|
"loss": 2.3637, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.357122726831252e-06, |
|
"loss": 2.3537, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.2515079832533543e-06, |
|
"loss": 2.3654, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.1475757941658023e-06, |
|
"loss": 2.329, |
|
"step": 1703 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.0453265116009987e-06, |
|
"loss": 2.3691, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.9447604818911844e-06, |
|
"loss": 2.3862, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.845878045667161e-06, |
|
"loss": 2.3251, |
|
"step": 1706 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.748679537857013e-06, |
|
"loss": 2.3459, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.6531652876853884e-06, |
|
"loss": 2.2902, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.559335618671943e-06, |
|
"loss": 2.3358, |
|
"step": 1709 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.4671908486305073e-06, |
|
"loss": 2.4531, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.3767312896679214e-06, |
|
"loss": 2.3742, |
|
"step": 1711 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.2879572481829814e-06, |
|
"loss": 2.4129, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.200869024865326e-06, |
|
"loss": 2.2343, |
|
"step": 1713 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.115466914694719e-06, |
|
"loss": 2.414, |
|
"step": 1714 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.0317512069394917e-06, |
|
"loss": 2.3059, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.949722185156266e-06, |
|
"loss": 2.2761, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.8693801271884026e-06, |
|
"loss": 2.3328, |
|
"step": 1717 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.7907253051653305e-06, |
|
"loss": 2.3489, |
|
"step": 1718 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.7137579855016072e-06, |
|
"loss": 2.2278, |
|
"step": 1719 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.6384784288959175e-06, |
|
"loss": 2.2491, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5648868903304637e-06, |
|
"loss": 2.3181, |
|
"step": 1721 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.4929836190696322e-06, |
|
"loss": 2.3198, |
|
"step": 1722 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.4227688586596066e-06, |
|
"loss": 2.3803, |
|
"step": 1723 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3542428469273116e-06, |
|
"loss": 2.3766, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2874058159796366e-06, |
|
"loss": 2.4288, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2222579922026577e-06, |
|
"loss": 2.4122, |
|
"step": 1726 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1587995962608623e-06, |
|
"loss": 2.3623, |
|
"step": 1727 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0970308430964805e-06, |
|
"loss": 2.381, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0369519419286543e-06, |
|
"loss": 2.4029, |
|
"step": 1729 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.785630962527159e-07, |
|
"loss": 2.3803, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.218645038396312e-07, |
|
"loss": 2.3646, |
|
"step": 1731 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.668563567352238e-07, |
|
"loss": 2.2831, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.135388412595646e-07, |
|
"loss": 2.3445, |
|
"step": 1733 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.619121380063043e-07, |
|
"loss": 2.3601, |
|
"step": 1734 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.119764218420644e-07, |
|
"loss": 2.299, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.637318619058807e-07, |
|
"loss": 2.3585, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.17178621608594e-07, |
|
"loss": 2.2829, |
|
"step": 1737 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.723168586324046e-07, |
|
"loss": 2.3443, |
|
"step": 1738 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.291467249302074e-07, |
|
"loss": 2.2769, |
|
"step": 1739 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.876683667249804e-07, |
|
"loss": 2.31, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.4788192450961886e-07, |
|
"loss": 2.335, |
|
"step": 1741 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.097875330462131e-07, |
|
"loss": 2.3004, |
|
"step": 1742 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.733853213656047e-07, |
|
"loss": 2.3632, |
|
"step": 1743 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.386754127670533e-07, |
|
"loss": 2.3078, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.056579248176816e-07, |
|
"loss": 2.3236, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7433296935219785e-07, |
|
"loss": 2.3353, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.4470065247250706e-07, |
|
"loss": 2.4458, |
|
"step": 1747 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.16761074547267e-07, |
|
"loss": 2.3492, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.905143302116108e-07, |
|
"loss": 2.3494, |
|
"step": 1749 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.6596050836681365e-07, |
|
"loss": 2.2391, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.4309969218001538e-07, |
|
"loss": 2.3532, |
|
"step": 1751 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2193195908388744e-07, |
|
"loss": 2.294, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0245738077641065e-07, |
|
"loss": 2.2277, |
|
"step": 1753 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.467602322065338e-08, |
|
"loss": 2.3152, |
|
"step": 1754 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.858794664449386e-08, |
|
"loss": 2.2876, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.419320554034268e-08, |
|
"loss": 2.3495, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.149184866519828e-08, |
|
"loss": 2.2936, |
|
"step": 1757 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.048391904031389e-08, |
|
"loss": 2.2783, |
|
"step": 1758 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.1169453950975472e-08, |
|
"loss": 2.3406, |
|
"step": 1759 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3548484946501737e-08, |
|
"loss": 2.3872, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1760, |
|
"total_flos": 1.4654118649502106e+17, |
|
"train_loss": 2.4962065879594197, |
|
"train_runtime": 20089.3142, |
|
"train_samples_per_second": 5.607, |
|
"train_steps_per_second": 0.088 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 1.4654118649502106e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|