|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 686, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9956268221574345e-05, |
|
"loss": 5.3629, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.991253644314869e-05, |
|
"loss": 3.9386, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9868804664723033e-05, |
|
"loss": 3.4456, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9825072886297377e-05, |
|
"loss": 3.3224, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.978134110787172e-05, |
|
"loss": 2.9937, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9737609329446064e-05, |
|
"loss": 2.8936, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.969387755102041e-05, |
|
"loss": 2.6102, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9650145772594756e-05, |
|
"loss": 2.5955, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9606413994169096e-05, |
|
"loss": 2.2454, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.956268221574344e-05, |
|
"loss": 2.294, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9518950437317784e-05, |
|
"loss": 2.01, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9475218658892128e-05, |
|
"loss": 2.1138, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9431486880466475e-05, |
|
"loss": 1.8897, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9387755102040816e-05, |
|
"loss": 1.8003, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9344023323615163e-05, |
|
"loss": 1.7822, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9300291545189504e-05, |
|
"loss": 1.8079, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.925655976676385e-05, |
|
"loss": 1.9259, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9212827988338192e-05, |
|
"loss": 1.5808, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9169096209912536e-05, |
|
"loss": 1.621, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9125364431486883e-05, |
|
"loss": 1.651, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9081632653061224e-05, |
|
"loss": 1.5975, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.903790087463557e-05, |
|
"loss": 1.5772, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.899416909620991e-05, |
|
"loss": 1.7805, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.895043731778426e-05, |
|
"loss": 1.5725, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.89067055393586e-05, |
|
"loss": 1.4716, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.8862973760932947e-05, |
|
"loss": 1.6262, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.881924198250729e-05, |
|
"loss": 1.536, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.877551020408163e-05, |
|
"loss": 1.5282, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.873177842565598e-05, |
|
"loss": 1.506, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.868804664723032e-05, |
|
"loss": 1.4047, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.8644314868804667e-05, |
|
"loss": 1.5451, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.860058309037901e-05, |
|
"loss": 1.2876, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.8556851311953354e-05, |
|
"loss": 1.3083, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.85131195335277e-05, |
|
"loss": 1.3228, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.8469387755102042e-05, |
|
"loss": 1.2497, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.8425655976676386e-05, |
|
"loss": 1.2854, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8381924198250727e-05, |
|
"loss": 1.332, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8338192419825074e-05, |
|
"loss": 1.2977, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8294460641399418e-05, |
|
"loss": 1.1913, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.8250728862973762e-05, |
|
"loss": 1.4206, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.8206997084548106e-05, |
|
"loss": 1.3032, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.816326530612245e-05, |
|
"loss": 1.1998, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8119533527696794e-05, |
|
"loss": 1.1315, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8075801749271134e-05, |
|
"loss": 1.199, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8032069970845482e-05, |
|
"loss": 1.1658, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.7988338192419826e-05, |
|
"loss": 1.1449, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.794460641399417e-05, |
|
"loss": 1.1687, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.7900874635568514e-05, |
|
"loss": 1.2565, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.7857142857142858e-05, |
|
"loss": 1.1276, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.78134110787172e-05, |
|
"loss": 1.1296, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.776967930029155e-05, |
|
"loss": 1.2696, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.772594752186589e-05, |
|
"loss": 1.3116, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.7682215743440233e-05, |
|
"loss": 1.2603, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.7638483965014577e-05, |
|
"loss": 1.1573, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.759475218658892e-05, |
|
"loss": 1.1059, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.7551020408163265e-05, |
|
"loss": 1.0841, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.750728862973761e-05, |
|
"loss": 1.1224, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7463556851311957e-05, |
|
"loss": 1.0036, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7419825072886297e-05, |
|
"loss": 1.1716, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7376093294460644e-05, |
|
"loss": 1.1219, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7332361516034985e-05, |
|
"loss": 1.3287, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.728862973760933e-05, |
|
"loss": 1.1589, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7244897959183673e-05, |
|
"loss": 1.1281, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7201166180758017e-05, |
|
"loss": 1.0111, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7157434402332364e-05, |
|
"loss": 1.0364, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7113702623906705e-05, |
|
"loss": 0.9568, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7069970845481052e-05, |
|
"loss": 1.0818, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7026239067055393e-05, |
|
"loss": 1.3587, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.698250728862974e-05, |
|
"loss": 1.0199, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.6938775510204084e-05, |
|
"loss": 1.1711, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.6895043731778424e-05, |
|
"loss": 1.0558, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.6851311953352772e-05, |
|
"loss": 1.1889, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.6807580174927112e-05, |
|
"loss": 0.8719, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.676384839650146e-05, |
|
"loss": 1.0826, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.67201166180758e-05, |
|
"loss": 0.8779, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.6676384839650148e-05, |
|
"loss": 1.0484, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.663265306122449e-05, |
|
"loss": 1.2392, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.6588921282798835e-05, |
|
"loss": 1.0915, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.654518950437318e-05, |
|
"loss": 0.9508, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.650145772594752e-05, |
|
"loss": 1.0686, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6457725947521867e-05, |
|
"loss": 1.0613, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6413994169096208e-05, |
|
"loss": 0.9126, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6370262390670555e-05, |
|
"loss": 1.2267, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.63265306122449e-05, |
|
"loss": 0.9892, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6282798833819243e-05, |
|
"loss": 0.9648, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6239067055393587e-05, |
|
"loss": 0.9477, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.619533527696793e-05, |
|
"loss": 1.0255, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6151603498542275e-05, |
|
"loss": 0.9431, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.610787172011662e-05, |
|
"loss": 0.9383, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6064139941690963e-05, |
|
"loss": 1.0558, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6020408163265307e-05, |
|
"loss": 0.9906, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.597667638483965e-05, |
|
"loss": 1.0479, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.5932944606413995e-05, |
|
"loss": 1.0124, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.588921282798834e-05, |
|
"loss": 0.8342, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.5845481049562683e-05, |
|
"loss": 1.0599, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.580174927113703e-05, |
|
"loss": 1.0933, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.575801749271137e-05, |
|
"loss": 1.0057, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5714285714285714e-05, |
|
"loss": 1.0297, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.567055393586006e-05, |
|
"loss": 1.1918, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5626822157434402e-05, |
|
"loss": 0.9445, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5583090379008746e-05, |
|
"loss": 1.1254, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.553935860058309e-05, |
|
"loss": 0.9527, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5495626822157438e-05, |
|
"loss": 0.8619, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5451895043731778e-05, |
|
"loss": 0.9988, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5408163265306125e-05, |
|
"loss": 0.7942, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5364431486880466e-05, |
|
"loss": 1.0989, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.532069970845481e-05, |
|
"loss": 0.9513, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5276967930029154e-05, |
|
"loss": 1.0775, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5233236151603498e-05, |
|
"loss": 1.1696, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5189504373177845e-05, |
|
"loss": 1.0216, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5145772594752186e-05, |
|
"loss": 1.03, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5102040816326533e-05, |
|
"loss": 0.9126, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5058309037900874e-05, |
|
"loss": 0.8499, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.501457725947522e-05, |
|
"loss": 0.8761, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4970845481049565e-05, |
|
"loss": 1.0023, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4927113702623906e-05, |
|
"loss": 0.9489, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4883381924198253e-05, |
|
"loss": 0.9472, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4839650145772593e-05, |
|
"loss": 0.9639, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.479591836734694e-05, |
|
"loss": 1.0146, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.475218658892128e-05, |
|
"loss": 1.0069, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.470845481049563e-05, |
|
"loss": 1.0586, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4664723032069973e-05, |
|
"loss": 1.2354, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4620991253644317e-05, |
|
"loss": 0.9995, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.457725947521866e-05, |
|
"loss": 0.9738, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4533527696793e-05, |
|
"loss": 1.0605, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.448979591836735e-05, |
|
"loss": 0.9859, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4446064139941692e-05, |
|
"loss": 1.0085, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4402332361516036e-05, |
|
"loss": 0.8942, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.435860058309038e-05, |
|
"loss": 0.9521, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4314868804664724e-05, |
|
"loss": 0.9921, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4271137026239068e-05, |
|
"loss": 0.985, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.422740524781341e-05, |
|
"loss": 0.8275, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.4183673469387756e-05, |
|
"loss": 1.0239, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.41399416909621e-05, |
|
"loss": 0.9122, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.4096209912536444e-05, |
|
"loss": 0.912, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.4052478134110788e-05, |
|
"loss": 0.9607, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.4008746355685132e-05, |
|
"loss": 1.101, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.3965014577259476e-05, |
|
"loss": 0.9501, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.392128279883382e-05, |
|
"loss": 0.8686, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3877551020408164e-05, |
|
"loss": 0.9697, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3833819241982508e-05, |
|
"loss": 0.9529, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.379008746355685e-05, |
|
"loss": 1.0023, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.3746355685131196e-05, |
|
"loss": 0.9224, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.370262390670554e-05, |
|
"loss": 0.8481, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.3658892128279883e-05, |
|
"loss": 0.8655, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.3615160349854227e-05, |
|
"loss": 0.9538, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.357142857142857e-05, |
|
"loss": 0.9949, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.352769679300292e-05, |
|
"loss": 0.9171, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.348396501457726e-05, |
|
"loss": 0.8127, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.3440233236151603e-05, |
|
"loss": 0.9542, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.3396501457725947e-05, |
|
"loss": 0.8685, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.335276967930029e-05, |
|
"loss": 1.0402, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.330903790087464e-05, |
|
"loss": 0.978, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.326530612244898e-05, |
|
"loss": 0.7553, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.3221574344023326e-05, |
|
"loss": 0.9466, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.3177842565597667e-05, |
|
"loss": 0.9465, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.3134110787172014e-05, |
|
"loss": 0.8261, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.3090379008746355e-05, |
|
"loss": 0.957, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.30466472303207e-05, |
|
"loss": 0.7844, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.3002915451895046e-05, |
|
"loss": 0.8258, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2959183673469387e-05, |
|
"loss": 1.0157, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2915451895043734e-05, |
|
"loss": 0.9961, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2871720116618074e-05, |
|
"loss": 0.8449, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2827988338192422e-05, |
|
"loss": 0.8206, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2784256559766762e-05, |
|
"loss": 0.815, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.274052478134111e-05, |
|
"loss": 0.9007, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.2696793002915454e-05, |
|
"loss": 0.8002, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.2653061224489794e-05, |
|
"loss": 0.8584, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.260932944606414e-05, |
|
"loss": 0.8984, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.2565597667638482e-05, |
|
"loss": 0.9296, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.252186588921283e-05, |
|
"loss": 0.9471, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.2478134110787173e-05, |
|
"loss": 0.9958, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.2434402332361517e-05, |
|
"loss": 0.9842, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.239067055393586e-05, |
|
"loss": 0.9085, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.2346938775510205e-05, |
|
"loss": 0.8902, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.230320699708455e-05, |
|
"loss": 1.108, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.225947521865889e-05, |
|
"loss": 0.8656, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.2215743440233237e-05, |
|
"loss": 0.9684, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.217201166180758e-05, |
|
"loss": 1.0054, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.2128279883381925e-05, |
|
"loss": 0.9552, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.208454810495627e-05, |
|
"loss": 0.7809, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.2040816326530613e-05, |
|
"loss": 1.0736, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1997084548104957e-05, |
|
"loss": 0.8351, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.19533527696793e-05, |
|
"loss": 1.0844, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.1909620991253645e-05, |
|
"loss": 0.8503, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.186588921282799e-05, |
|
"loss": 0.9959, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.1822157434402333e-05, |
|
"loss": 1.0989, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.1778425655976677e-05, |
|
"loss": 0.9763, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.173469387755102e-05, |
|
"loss": 1.0347, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.1690962099125364e-05, |
|
"loss": 0.9371, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1647230320699712e-05, |
|
"loss": 0.7817, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1603498542274052e-05, |
|
"loss": 0.9467, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.15597667638484e-05, |
|
"loss": 0.9691, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.151603498542274e-05, |
|
"loss": 0.7149, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1472303206997084e-05, |
|
"loss": 0.7316, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 0.7645, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1384839650145772e-05, |
|
"loss": 0.7922, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.134110787172012e-05, |
|
"loss": 0.9184, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.129737609329446e-05, |
|
"loss": 0.8611, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1253644314868807e-05, |
|
"loss": 0.8158, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1209912536443148e-05, |
|
"loss": 1.0274, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1166180758017495e-05, |
|
"loss": 0.9442, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1122448979591836e-05, |
|
"loss": 1.1595, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.107871720116618e-05, |
|
"loss": 0.8674, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.1034985422740527e-05, |
|
"loss": 0.9162, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0991253644314868e-05, |
|
"loss": 0.8482, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0947521865889215e-05, |
|
"loss": 0.8585, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0903790087463556e-05, |
|
"loss": 0.9633, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0860058309037903e-05, |
|
"loss": 0.9125, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0816326530612247e-05, |
|
"loss": 0.7642, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.077259475218659e-05, |
|
"loss": 0.8236, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0728862973760935e-05, |
|
"loss": 0.7912, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0685131195335275e-05, |
|
"loss": 0.8434, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0641399416909623e-05, |
|
"loss": 0.8584, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0597667638483963e-05, |
|
"loss": 0.8301, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.055393586005831e-05, |
|
"loss": 0.8389, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0510204081632654e-05, |
|
"loss": 0.9902, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0466472303207e-05, |
|
"loss": 0.8553, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0422740524781342e-05, |
|
"loss": 0.872, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0379008746355683e-05, |
|
"loss": 0.8447, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.033527696793003e-05, |
|
"loss": 0.8772, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.029154518950437e-05, |
|
"loss": 0.8157, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.0247813411078718e-05, |
|
"loss": 0.7805, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.0204081632653062e-05, |
|
"loss": 0.708, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0160349854227406e-05, |
|
"loss": 0.7722, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.011661807580175e-05, |
|
"loss": 0.9642, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0072886297376094e-05, |
|
"loss": 0.7912, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0029154518950438e-05, |
|
"loss": 1.0257, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9985422740524782e-05, |
|
"loss": 0.9954, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9941690962099126e-05, |
|
"loss": 0.9803, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.989795918367347e-05, |
|
"loss": 0.7859, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.9854227405247814e-05, |
|
"loss": 0.8381, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.9810495626822158e-05, |
|
"loss": 0.6901, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.97667638483965e-05, |
|
"loss": 1.0198, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9723032069970846e-05, |
|
"loss": 0.9632, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9679300291545193e-05, |
|
"loss": 0.7811, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9635568513119533e-05, |
|
"loss": 0.776, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9591836734693877e-05, |
|
"loss": 0.7588, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.954810495626822e-05, |
|
"loss": 0.8946, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9504373177842565e-05, |
|
"loss": 0.8895, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.946064139941691e-05, |
|
"loss": 0.9084, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.9416909620991253e-05, |
|
"loss": 0.8147, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.93731778425656e-05, |
|
"loss": 0.8603, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.932944606413994e-05, |
|
"loss": 0.8547, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.928571428571429e-05, |
|
"loss": 0.7791, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.924198250728863e-05, |
|
"loss": 0.8709, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.9198250728862973e-05, |
|
"loss": 0.7485, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.915451895043732e-05, |
|
"loss": 0.8703, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.911078717201166e-05, |
|
"loss": 0.9068, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.9067055393586008e-05, |
|
"loss": 0.8725, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.902332361516035e-05, |
|
"loss": 0.7559, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8979591836734696e-05, |
|
"loss": 0.8044, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8935860058309037e-05, |
|
"loss": 1.0526, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8892128279883384e-05, |
|
"loss": 0.8945, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8848396501457728e-05, |
|
"loss": 0.799, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.880466472303207e-05, |
|
"loss": 0.7597, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8760932944606416e-05, |
|
"loss": 0.772, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8717201166180756e-05, |
|
"loss": 0.9276, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8673469387755104e-05, |
|
"loss": 0.7726, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8629737609329444e-05, |
|
"loss": 0.9847, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.858600583090379e-05, |
|
"loss": 0.8455, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8542274052478135e-05, |
|
"loss": 0.7554, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.849854227405248e-05, |
|
"loss": 0.7653, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.8454810495626823e-05, |
|
"loss": 0.8519, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.8411078717201164e-05, |
|
"loss": 0.7033, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.836734693877551e-05, |
|
"loss": 0.8955, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.8323615160349855e-05, |
|
"loss": 0.7778, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.82798833819242e-05, |
|
"loss": 0.819, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.8236151603498543e-05, |
|
"loss": 0.8427, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8192419825072887e-05, |
|
"loss": 0.8098, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.814868804664723e-05, |
|
"loss": 0.8787, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8104956268221575e-05, |
|
"loss": 0.7492, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.806122448979592e-05, |
|
"loss": 0.7385, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.8017492711370263e-05, |
|
"loss": 0.7456, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.7973760932944607e-05, |
|
"loss": 0.8629, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.793002915451895e-05, |
|
"loss": 0.9145, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.7886297376093295e-05, |
|
"loss": 0.8599, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.784256559766764e-05, |
|
"loss": 0.8948, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.7798833819241983e-05, |
|
"loss": 0.8635, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7755102040816327e-05, |
|
"loss": 0.7858, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7711370262390674e-05, |
|
"loss": 0.9611, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7667638483965014e-05, |
|
"loss": 0.814, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.762390670553936e-05, |
|
"loss": 0.8337, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7580174927113702e-05, |
|
"loss": 0.7874, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7536443148688046e-05, |
|
"loss": 0.7569, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7492711370262394e-05, |
|
"loss": 0.8028, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.7448979591836734e-05, |
|
"loss": 0.7952, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.740524781341108e-05, |
|
"loss": 0.8058, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.7361516034985422e-05, |
|
"loss": 0.7663, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.731778425655977e-05, |
|
"loss": 0.8116, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.727405247813411e-05, |
|
"loss": 0.8715, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.7230320699708454e-05, |
|
"loss": 0.9337, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.71865889212828e-05, |
|
"loss": 0.6941, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.7186, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.709912536443149e-05, |
|
"loss": 0.8248, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.705539358600583e-05, |
|
"loss": 0.9292, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.7011661807580177e-05, |
|
"loss": 0.8749, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6967930029154518e-05, |
|
"loss": 0.9838, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6924198250728865e-05, |
|
"loss": 0.8464, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.688046647230321e-05, |
|
"loss": 0.8021, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.683673469387755e-05, |
|
"loss": 0.8395, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6793002915451897e-05, |
|
"loss": 0.7987, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6749271137026237e-05, |
|
"loss": 0.8711, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6705539358600585e-05, |
|
"loss": 0.752, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.666180758017493e-05, |
|
"loss": 0.7489, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6618075801749273e-05, |
|
"loss": 0.8933, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6574344023323617e-05, |
|
"loss": 0.8087, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6530612244897957e-05, |
|
"loss": 0.9219, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6486880466472304e-05, |
|
"loss": 0.7434, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6443148688046645e-05, |
|
"loss": 0.6875, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6399416909620992e-05, |
|
"loss": 0.8969, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6355685131195336e-05, |
|
"loss": 0.7929, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.631195335276968e-05, |
|
"loss": 0.783, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6268221574344024e-05, |
|
"loss": 0.98, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6224489795918368e-05, |
|
"loss": 0.8074, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6180758017492712e-05, |
|
"loss": 0.8186, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6137026239067053e-05, |
|
"loss": 0.6993, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.60932944606414e-05, |
|
"loss": 0.8308, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6049562682215744e-05, |
|
"loss": 0.7593, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6005830903790088e-05, |
|
"loss": 0.6809, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.5962099125364432e-05, |
|
"loss": 0.7695, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.5918367346938776e-05, |
|
"loss": 0.917, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.587463556851312e-05, |
|
"loss": 0.7101, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.5830903790087464e-05, |
|
"loss": 0.9573, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5787172011661808e-05, |
|
"loss": 0.8902, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.574344023323615e-05, |
|
"loss": 0.7231, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5699708454810496e-05, |
|
"loss": 0.8754, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.565597667638484e-05, |
|
"loss": 0.9213, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.5612244897959183e-05, |
|
"loss": 0.7397, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.5568513119533527e-05, |
|
"loss": 0.7227, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5524781341107875e-05, |
|
"loss": 0.7672, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5481049562682215e-05, |
|
"loss": 0.7325, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5437317784256563e-05, |
|
"loss": 0.8339, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5393586005830903e-05, |
|
"loss": 0.793, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5349854227405247e-05, |
|
"loss": 0.9217, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.530612244897959e-05, |
|
"loss": 0.9395, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5262390670553935e-05, |
|
"loss": 0.7461, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.521865889212828e-05, |
|
"loss": 0.7148, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5174927113702623e-05, |
|
"loss": 0.9149, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.513119533527697e-05, |
|
"loss": 0.812, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5087463556851312e-05, |
|
"loss": 0.6849, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5043731778425658e-05, |
|
"loss": 0.7184, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.8343, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.4956268221574344e-05, |
|
"loss": 0.6949, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.4912536443148688e-05, |
|
"loss": 0.6325, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.4868804664723032e-05, |
|
"loss": 0.6235, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.4825072886297378e-05, |
|
"loss": 0.5982, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.478134110787172e-05, |
|
"loss": 0.803, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.4737609329446064e-05, |
|
"loss": 0.6774, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.4693877551020408e-05, |
|
"loss": 0.6396, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.4650145772594752e-05, |
|
"loss": 0.7721, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.4606413994169096e-05, |
|
"loss": 0.8477, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.4562682215743442e-05, |
|
"loss": 0.7166, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.4518950437317786e-05, |
|
"loss": 0.6573, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.447521865889213e-05, |
|
"loss": 0.7047, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.4431486880466473e-05, |
|
"loss": 0.613, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.4387755102040816e-05, |
|
"loss": 0.7446, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.434402332361516e-05, |
|
"loss": 0.668, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.4300291545189505e-05, |
|
"loss": 0.6964, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.425655976676385e-05, |
|
"loss": 0.7486, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.4212827988338193e-05, |
|
"loss": 0.7398, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.4169096209912537e-05, |
|
"loss": 0.5944, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.4125364431486881e-05, |
|
"loss": 0.6959, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.4081632653061225e-05, |
|
"loss": 0.6982, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.4037900874635567e-05, |
|
"loss": 0.6906, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.3994169096209913e-05, |
|
"loss": 0.6168, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.3950437317784257e-05, |
|
"loss": 0.8569, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.39067055393586e-05, |
|
"loss": 0.696, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.3862973760932945e-05, |
|
"loss": 0.7874, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.3819241982507289e-05, |
|
"loss": 0.6416, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.3775510204081633e-05, |
|
"loss": 0.7262, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.3731778425655978e-05, |
|
"loss": 0.7177, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.3688046647230322e-05, |
|
"loss": 0.7477, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.3644314868804664e-05, |
|
"loss": 0.7449, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.3600583090379008e-05, |
|
"loss": 0.6565, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.3556851311953352e-05, |
|
"loss": 0.7586, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.3513119533527696e-05, |
|
"loss": 0.519, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.3469387755102042e-05, |
|
"loss": 0.711, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.3425655976676386e-05, |
|
"loss": 0.7435, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.338192419825073e-05, |
|
"loss": 0.822, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.3338192419825074e-05, |
|
"loss": 0.7531, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.3294460641399418e-05, |
|
"loss": 0.8351, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.325072886297376e-05, |
|
"loss": 0.6045, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.3206997084548104e-05, |
|
"loss": 0.8374, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.316326530612245e-05, |
|
"loss": 0.7024, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.3119533527696794e-05, |
|
"loss": 0.6995, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.3075801749271137e-05, |
|
"loss": 0.7083, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.3032069970845481e-05, |
|
"loss": 0.6404, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2988338192419825e-05, |
|
"loss": 0.8153, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.294460641399417e-05, |
|
"loss": 0.6466, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.2900874635568515e-05, |
|
"loss": 0.6655, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.2857142857142857e-05, |
|
"loss": 0.4714, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2813411078717201e-05, |
|
"loss": 0.7221, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2769679300291545e-05, |
|
"loss": 0.6697, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2725947521865889e-05, |
|
"loss": 0.8205, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2682215743440233e-05, |
|
"loss": 0.6631, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2638483965014577e-05, |
|
"loss": 0.6889, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2594752186588923e-05, |
|
"loss": 0.7284, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2551020408163267e-05, |
|
"loss": 0.7834, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.250728862973761e-05, |
|
"loss": 0.6491, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.2463556851311953e-05, |
|
"loss": 0.796, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.2419825072886297e-05, |
|
"loss": 0.7083, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.237609329446064e-05, |
|
"loss": 0.6393, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2332361516034986e-05, |
|
"loss": 0.7, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.228862973760933e-05, |
|
"loss": 0.7005, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2244897959183674e-05, |
|
"loss": 0.635, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2201166180758018e-05, |
|
"loss": 0.7491, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2157434402332362e-05, |
|
"loss": 0.6133, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2113702623906704e-05, |
|
"loss": 0.6128, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.206997084548105e-05, |
|
"loss": 0.6358, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.2026239067055394e-05, |
|
"loss": 0.8197, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.1982507288629738e-05, |
|
"loss": 0.7669, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.1938775510204082e-05, |
|
"loss": 0.6701, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1895043731778426e-05, |
|
"loss": 0.7437, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.185131195335277e-05, |
|
"loss": 0.8168, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1807580174927114e-05, |
|
"loss": 0.6048, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.176384839650146e-05, |
|
"loss": 0.681, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1720116618075802e-05, |
|
"loss": 0.779, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1676384839650146e-05, |
|
"loss": 0.7463, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.163265306122449e-05, |
|
"loss": 0.7479, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1588921282798833e-05, |
|
"loss": 0.7277, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1545189504373177e-05, |
|
"loss": 0.7327, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1501457725947523e-05, |
|
"loss": 0.6682, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1457725947521867e-05, |
|
"loss": 0.6823, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1413994169096211e-05, |
|
"loss": 0.6142, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1370262390670555e-05, |
|
"loss": 0.7939, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1326530612244897e-05, |
|
"loss": 0.7539, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1282798833819241e-05, |
|
"loss": 0.6073, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1239067055393587e-05, |
|
"loss": 0.7128, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.119533527696793e-05, |
|
"loss": 0.6475, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1151603498542275e-05, |
|
"loss": 0.7107, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1107871720116619e-05, |
|
"loss": 0.6458, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1064139941690962e-05, |
|
"loss": 0.8056, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1020408163265306e-05, |
|
"loss": 0.7321, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.097667638483965e-05, |
|
"loss": 0.7666, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.0932944606413994e-05, |
|
"loss": 0.7894, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.0889212827988338e-05, |
|
"loss": 0.581, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.0845481049562682e-05, |
|
"loss": 0.6994, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.0801749271137026e-05, |
|
"loss": 0.7405, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.075801749271137e-05, |
|
"loss": 0.7731, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 0.8333, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.067055393586006e-05, |
|
"loss": 0.8215, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.0626822157434404e-05, |
|
"loss": 0.7639, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.0583090379008748e-05, |
|
"loss": 0.6595, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.053935860058309e-05, |
|
"loss": 0.7345, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.0495626822157434e-05, |
|
"loss": 0.6349, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.0451895043731778e-05, |
|
"loss": 0.7739, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.0408163265306123e-05, |
|
"loss": 0.7256, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.0364431486880467e-05, |
|
"loss": 0.6909, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.0320699708454811e-05, |
|
"loss": 0.663, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.0276967930029155e-05, |
|
"loss": 0.6206, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.02332361516035e-05, |
|
"loss": 0.6993, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.0189504373177841e-05, |
|
"loss": 0.7027, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.0145772594752185e-05, |
|
"loss": 0.7586, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.0102040816326531e-05, |
|
"loss": 0.821, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.0058309037900875e-05, |
|
"loss": 0.7111, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.0014577259475219e-05, |
|
"loss": 0.6358, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.970845481049563e-06, |
|
"loss": 0.635, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.927113702623907e-06, |
|
"loss": 0.6775, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.88338192419825e-06, |
|
"loss": 0.6923, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.839650145772596e-06, |
|
"loss": 0.7741, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.795918367346939e-06, |
|
"loss": 0.6225, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.752186588921283e-06, |
|
"loss": 0.733, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.708454810495627e-06, |
|
"loss": 0.8117, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.66472303206997e-06, |
|
"loss": 0.7334, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.620991253644314e-06, |
|
"loss": 0.6461, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.57725947521866e-06, |
|
"loss": 0.7507, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.533527696793004e-06, |
|
"loss": 0.6619, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.489795918367348e-06, |
|
"loss": 0.6141, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.446064139941692e-06, |
|
"loss": 0.6757, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.402332361516034e-06, |
|
"loss": 0.6319, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.358600583090378e-06, |
|
"loss": 0.651, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.314868804664722e-06, |
|
"loss": 0.6266, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.271137026239068e-06, |
|
"loss": 0.5657, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.227405247813412e-06, |
|
"loss": 0.6838, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.183673469387756e-06, |
|
"loss": 0.8081, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.1399416909621e-06, |
|
"loss": 0.5915, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.096209912536444e-06, |
|
"loss": 0.6921, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 9.052478134110787e-06, |
|
"loss": 0.6148, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 9.008746355685131e-06, |
|
"loss": 0.6602, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 8.965014577259475e-06, |
|
"loss": 0.5945, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.92128279883382e-06, |
|
"loss": 0.6965, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.877551020408163e-06, |
|
"loss": 0.7285, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.833819241982507e-06, |
|
"loss": 0.8048, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.790087463556851e-06, |
|
"loss": 0.6653, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.746355685131197e-06, |
|
"loss": 0.6806, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.70262390670554e-06, |
|
"loss": 0.7117, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.658892128279885e-06, |
|
"loss": 0.6386, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.615160349854227e-06, |
|
"loss": 0.6763, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.7101, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.527696793002915e-06, |
|
"loss": 0.6231, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.483965014577259e-06, |
|
"loss": 0.6945, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.440233236151604e-06, |
|
"loss": 0.6969, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.396501457725948e-06, |
|
"loss": 0.6003, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.352769679300292e-06, |
|
"loss": 0.7275, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.309037900874636e-06, |
|
"loss": 0.659, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.265306122448979e-06, |
|
"loss": 0.6436, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.221574344023323e-06, |
|
"loss": 0.7147, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.177842565597668e-06, |
|
"loss": 0.6923, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.134110787172012e-06, |
|
"loss": 0.6701, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.090379008746356e-06, |
|
"loss": 0.7177, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.0466472303207e-06, |
|
"loss": 0.6963, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.002915451895044e-06, |
|
"loss": 0.7587, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.959183673469388e-06, |
|
"loss": 0.5824, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.915451895043732e-06, |
|
"loss": 0.7099, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.871720116618076e-06, |
|
"loss": 0.6534, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.82798833819242e-06, |
|
"loss": 0.6332, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.784256559766764e-06, |
|
"loss": 0.655, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.740524781341108e-06, |
|
"loss": 0.6822, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.696793002915452e-06, |
|
"loss": 0.5835, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.653061224489796e-06, |
|
"loss": 0.7233, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.60932944606414e-06, |
|
"loss": 0.6536, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.565597667638485e-06, |
|
"loss": 0.6018, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.521865889212829e-06, |
|
"loss": 0.6019, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.478134110787172e-06, |
|
"loss": 0.7141, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.434402332361516e-06, |
|
"loss": 0.7671, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 7.39067055393586e-06, |
|
"loss": 0.6633, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 7.346938775510204e-06, |
|
"loss": 0.5977, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 7.303206997084548e-06, |
|
"loss": 0.7577, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.259475218658893e-06, |
|
"loss": 0.7126, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.215743440233237e-06, |
|
"loss": 0.6572, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.17201166180758e-06, |
|
"loss": 0.6463, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.128279883381925e-06, |
|
"loss": 0.565, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.0845481049562685e-06, |
|
"loss": 0.6441, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.0408163265306125e-06, |
|
"loss": 0.8821, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.9970845481049564e-06, |
|
"loss": 0.6033, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.9533527696793e-06, |
|
"loss": 0.6548, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.909620991253644e-06, |
|
"loss": 0.6689, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.865889212827989e-06, |
|
"loss": 0.6853, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.822157434402332e-06, |
|
"loss": 0.5613, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.778425655976676e-06, |
|
"loss": 0.7409, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.734693877551021e-06, |
|
"loss": 0.644, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.690962099125365e-06, |
|
"loss": 0.5878, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.647230320699709e-06, |
|
"loss": 0.6137, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.603498542274052e-06, |
|
"loss": 0.7217, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.559766763848397e-06, |
|
"loss": 0.7005, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.516034985422741e-06, |
|
"loss": 0.7718, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.472303206997085e-06, |
|
"loss": 0.6834, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.428571428571429e-06, |
|
"loss": 0.6291, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.3848396501457726e-06, |
|
"loss": 0.6906, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.3411078717201165e-06, |
|
"loss": 0.7809, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.297376093294461e-06, |
|
"loss": 0.661, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.253644314868805e-06, |
|
"loss": 0.6932, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.209912536443148e-06, |
|
"loss": 0.7602, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.166180758017493e-06, |
|
"loss": 0.7317, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.122448979591837e-06, |
|
"loss": 0.7391, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.078717201166181e-06, |
|
"loss": 0.6777, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.034985422740525e-06, |
|
"loss": 0.7403, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.991253644314869e-06, |
|
"loss": 0.6889, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.947521865889213e-06, |
|
"loss": 0.655, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.903790087463557e-06, |
|
"loss": 0.5358, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.860058309037901e-06, |
|
"loss": 0.6668, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.816326530612245e-06, |
|
"loss": 0.5347, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.772594752186589e-06, |
|
"loss": 0.5213, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.7288629737609335e-06, |
|
"loss": 0.7983, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.6851311953352774e-06, |
|
"loss": 0.7329, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.6413994169096205e-06, |
|
"loss": 0.6845, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5.597667638483965e-06, |
|
"loss": 0.54, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5.553935860058309e-06, |
|
"loss": 0.5828, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5.510204081632653e-06, |
|
"loss": 0.7079, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5.466472303206997e-06, |
|
"loss": 0.6471, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5.422740524781341e-06, |
|
"loss": 0.7349, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5.379008746355685e-06, |
|
"loss": 0.6399, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5.33527696793003e-06, |
|
"loss": 0.7541, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5.291545189504374e-06, |
|
"loss": 0.6564, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5.247813411078717e-06, |
|
"loss": 0.5897, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5.204081632653062e-06, |
|
"loss": 0.7713, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.160349854227406e-06, |
|
"loss": 0.5714, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.11661807580175e-06, |
|
"loss": 0.666, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.072886297376093e-06, |
|
"loss": 0.6647, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.0291545189504375e-06, |
|
"loss": 0.7072, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.9854227405247814e-06, |
|
"loss": 0.7326, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.941690962099125e-06, |
|
"loss": 0.6275, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.897959183673469e-06, |
|
"loss": 0.6175, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.854227405247813e-06, |
|
"loss": 0.6715, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.810495626822157e-06, |
|
"loss": 0.69, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.766763848396502e-06, |
|
"loss": 0.6194, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.723032069970846e-06, |
|
"loss": 0.7825, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.679300291545189e-06, |
|
"loss": 0.5966, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.635568513119534e-06, |
|
"loss": 0.593, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.591836734693878e-06, |
|
"loss": 0.8133, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.548104956268222e-06, |
|
"loss": 0.665, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.504373177842566e-06, |
|
"loss": 0.6998, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.46064139941691e-06, |
|
"loss": 0.6609, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.416909620991254e-06, |
|
"loss": 0.5941, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.373177842565598e-06, |
|
"loss": 0.6983, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.329446064139942e-06, |
|
"loss": 0.5953, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.7267, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.241982507288629e-06, |
|
"loss": 0.791, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.198250728862974e-06, |
|
"loss": 0.6921, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.154518950437318e-06, |
|
"loss": 0.8183, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.110787172011661e-06, |
|
"loss": 0.6162, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.067055393586006e-06, |
|
"loss": 0.7695, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.02332361516035e-06, |
|
"loss": 0.6925, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.979591836734694e-06, |
|
"loss": 0.6336, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.935860058309038e-06, |
|
"loss": 0.7375, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.892128279883382e-06, |
|
"loss": 0.8469, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.848396501457726e-06, |
|
"loss": 0.635, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.80466472303207e-06, |
|
"loss": 0.9703, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.7609329446064145e-06, |
|
"loss": 0.653, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.717201166180758e-06, |
|
"loss": 0.6354, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.673469387755102e-06, |
|
"loss": 0.6025, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.6297376093294464e-06, |
|
"loss": 0.6416, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.58600583090379e-06, |
|
"loss": 0.6034, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.5422740524781343e-06, |
|
"loss": 0.6376, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.4985422740524782e-06, |
|
"loss": 0.6776, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.454810495626822e-06, |
|
"loss": 0.6841, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.411078717201166e-06, |
|
"loss": 0.641, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.3673469387755105e-06, |
|
"loss": 0.76, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.3236151603498544e-06, |
|
"loss": 0.7558, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.2798833819241984e-06, |
|
"loss": 0.6344, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.2361516034985423e-06, |
|
"loss": 0.6477, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.1924198250728863e-06, |
|
"loss": 0.5704, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.1486880466472307e-06, |
|
"loss": 0.7345, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.104956268221574e-06, |
|
"loss": 0.6538, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.0612244897959185e-06, |
|
"loss": 0.6457, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.0174927113702625e-06, |
|
"loss": 0.6632, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.9737609329446064e-06, |
|
"loss": 0.5877, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.9300291545189504e-06, |
|
"loss": 0.7263, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.8862973760932943e-06, |
|
"loss": 0.7151, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.8425655976676387e-06, |
|
"loss": 0.7417, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.7988338192419827e-06, |
|
"loss": 0.7464, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.7551020408163266e-06, |
|
"loss": 0.6743, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.7113702623906706e-06, |
|
"loss": 0.6379, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.667638483965015e-06, |
|
"loss": 0.6686, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.6239067055393585e-06, |
|
"loss": 0.6287, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.580174927113703e-06, |
|
"loss": 0.8058, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.5364431486880463e-06, |
|
"loss": 0.6915, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.4927113702623907e-06, |
|
"loss": 0.6038, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.4489795918367347e-06, |
|
"loss": 0.6473, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.4052478134110786e-06, |
|
"loss": 0.6237, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.361516034985423e-06, |
|
"loss": 0.8068, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.317784256559767e-06, |
|
"loss": 0.6403, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.274052478134111e-06, |
|
"loss": 0.6132, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.230320699708455e-06, |
|
"loss": 0.8262, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.186588921282799e-06, |
|
"loss": 0.7788, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"loss": 0.6717, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.099125364431487e-06, |
|
"loss": 0.7498, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.0553935860058306e-06, |
|
"loss": 0.6071, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.011661807580175e-06, |
|
"loss": 0.6848, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.967930029154519e-06, |
|
"loss": 0.784, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.924198250728863e-06, |
|
"loss": 0.6056, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8804664723032073e-06, |
|
"loss": 0.7672, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.836734693877551e-06, |
|
"loss": 0.7579, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.793002915451895e-06, |
|
"loss": 0.6712, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.7492711370262391e-06, |
|
"loss": 0.758, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.705539358600583e-06, |
|
"loss": 0.6262, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.6618075801749272e-06, |
|
"loss": 0.5719, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.6180758017492712e-06, |
|
"loss": 0.6228, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.5743440233236153e-06, |
|
"loss": 0.5954, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.5306122448979593e-06, |
|
"loss": 0.6157, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.4868804664723032e-06, |
|
"loss": 0.6907, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.4431486880466472e-06, |
|
"loss": 0.5488, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.3994169096209913e-06, |
|
"loss": 0.5904, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.3556851311953353e-06, |
|
"loss": 0.608, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.3119533527696792e-06, |
|
"loss": 0.8896, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.2682215743440232e-06, |
|
"loss": 0.6197, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.2244897959183673e-06, |
|
"loss": 0.6063, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.1807580174927115e-06, |
|
"loss": 0.7669, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.1370262390670554e-06, |
|
"loss": 0.8527, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.0932944606413996e-06, |
|
"loss": 0.6512, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.0495626822157436e-06, |
|
"loss": 0.6591, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.0058309037900875e-06, |
|
"loss": 0.6701, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.620991253644314e-07, |
|
"loss": 0.598, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.183673469387755e-07, |
|
"loss": 0.7497, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.746355685131196e-07, |
|
"loss": 0.6544, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.309037900874636e-07, |
|
"loss": 0.7283, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.871720116618077e-07, |
|
"loss": 0.7193, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.434402332361516e-07, |
|
"loss": 0.6521, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.997084548104957e-07, |
|
"loss": 0.5521, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.559766763848396e-07, |
|
"loss": 0.581, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.122448979591837e-07, |
|
"loss": 0.6203, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.685131195335277e-07, |
|
"loss": 0.5413, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.247813411078718e-07, |
|
"loss": 0.7687, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.810495626822157e-07, |
|
"loss": 0.6807, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.373177842565598e-07, |
|
"loss": 0.6888, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.9358600583090383e-07, |
|
"loss": 0.59, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.4985422740524783e-07, |
|
"loss": 0.617, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.0612244897959183e-07, |
|
"loss": 0.6244, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.623906705539359e-07, |
|
"loss": 0.6787, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.186588921282799e-07, |
|
"loss": 0.7535, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7492711370262392e-07, |
|
"loss": 0.649, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.3119533527696794e-07, |
|
"loss": 0.6996, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 8.746355685131196e-08, |
|
"loss": 0.7669, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.373177842565598e-08, |
|
"loss": 0.7109, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.7227, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 686, |
|
"total_flos": 1.932278221504512e+17, |
|
"train_loss": 0.8650754356088861, |
|
"train_runtime": 3796.8514, |
|
"train_samples_per_second": 46.143, |
|
"train_steps_per_second": 0.181 |
|
} |
|
], |
|
"max_steps": 686, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.932278221504512e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|