|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.994413407821229, |
|
"global_step": 804, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.060606060606061e-07, |
|
"loss": 1.0973, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2121212121212122e-06, |
|
"loss": 1.048, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 0.9709, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4242424242424244e-06, |
|
"loss": 0.9895, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"loss": 0.9636, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 0.9684, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.242424242424243e-06, |
|
"loss": 0.9015, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.848484848484849e-06, |
|
"loss": 0.9344, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 0.9279, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 0.9241, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.877, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 0.9253, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.87878787878788e-06, |
|
"loss": 0.8935, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.484848484848486e-06, |
|
"loss": 0.9063, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.9067, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.696969696969698e-06, |
|
"loss": 0.865, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0303030303030304e-05, |
|
"loss": 0.8991, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.8919, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1515151515151517e-05, |
|
"loss": 0.9375, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 0.8823, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 0.9011, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.8969, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.3939393939393942e-05, |
|
"loss": 0.9377, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 0.865, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 0.8763, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.575757575757576e-05, |
|
"loss": 0.8526, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 0.8437, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6969696969696972e-05, |
|
"loss": 0.8529, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7575757575757576e-05, |
|
"loss": 0.8917, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.8718, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8787878787878792e-05, |
|
"loss": 0.8952, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9393939393939395e-05, |
|
"loss": 0.8822, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8738, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9999916984269715e-05, |
|
"loss": 0.8835, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9999667938457175e-05, |
|
"loss": 0.8765, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9999252866697328e-05, |
|
"loss": 0.9078, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9998671775881668e-05, |
|
"loss": 0.8881, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9997924675658134e-05, |
|
"loss": 0.8477, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9997011578430938e-05, |
|
"loss": 0.8506, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.999593249936037e-05, |
|
"loss": 0.8646, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9994687456362532e-05, |
|
"loss": 0.8204, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9993276470109054e-05, |
|
"loss": 0.8339, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9991699564026755e-05, |
|
"loss": 0.8299, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9989956764297232e-05, |
|
"loss": 0.8368, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9988048099856443e-05, |
|
"loss": 0.8283, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.998597360239422e-05, |
|
"loss": 0.8118, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9983733306353755e-05, |
|
"loss": 0.853, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9981327248931008e-05, |
|
"loss": 0.8937, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9978755470074093e-05, |
|
"loss": 0.8151, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9976018012482642e-05, |
|
"loss": 0.86, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9973114921607055e-05, |
|
"loss": 0.8584, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9970046245647777e-05, |
|
"loss": 0.8532, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.996681203555449e-05, |
|
"loss": 0.8282, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.996341234502524e-05, |
|
"loss": 0.8483, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.99598472305056e-05, |
|
"loss": 0.8513, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.995611675118768e-05, |
|
"loss": 0.8674, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9952220969009175e-05, |
|
"loss": 0.8434, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9948159948652324e-05, |
|
"loss": 0.8474, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9943933757542847e-05, |
|
"loss": 0.8793, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9939542465848808e-05, |
|
"loss": 0.8488, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9934986146479457e-05, |
|
"loss": 0.8291, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9930264875084045e-05, |
|
"loss": 0.857, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9925378730050518e-05, |
|
"loss": 0.8573, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9920327792504263e-05, |
|
"loss": 0.8404, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.991511214630673e-05, |
|
"loss": 0.8245, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9909731878054057e-05, |
|
"loss": 0.8506, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.990418707707562e-05, |
|
"loss": 0.8564, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9898477835432566e-05, |
|
"loss": 0.8516, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.989260424791626e-05, |
|
"loss": 0.8626, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.988656641204674e-05, |
|
"loss": 0.8331, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9880364428071073e-05, |
|
"loss": 0.8271, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9873998398961708e-05, |
|
"loss": 0.8371, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9867468430414752e-05, |
|
"loss": 0.8212, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9860774630848228e-05, |
|
"loss": 0.8712, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.985391711140027e-05, |
|
"loss": 0.8365, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.984689598592727e-05, |
|
"loss": 0.8543, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9839711371002006e-05, |
|
"loss": 0.8369, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9832363385911682e-05, |
|
"loss": 0.8333, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.982485215265598e-05, |
|
"loss": 0.8556, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.981717779594499e-05, |
|
"loss": 0.8765, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.980934044319718e-05, |
|
"loss": 0.8738, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9801340224537266e-05, |
|
"loss": 0.8279, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9793177272794044e-05, |
|
"loss": 0.8081, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.97848517234982e-05, |
|
"loss": 0.818, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9776363714880037e-05, |
|
"loss": 0.7964, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9767713387867204e-05, |
|
"loss": 0.8212, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9758900886082343e-05, |
|
"loss": 0.8306, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9749926355840716e-05, |
|
"loss": 0.8317, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9740789946147748e-05, |
|
"loss": 0.8105, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9731491808696593e-05, |
|
"loss": 0.8434, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.972203209786558e-05, |
|
"loss": 0.8484, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9712410970715674e-05, |
|
"loss": 0.8463, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9702628586987846e-05, |
|
"loss": 0.8552, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.969268510910045e-05, |
|
"loss": 0.8104, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9682580702146497e-05, |
|
"loss": 0.865, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9672315533890934e-05, |
|
"loss": 0.8318, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.966188977476785e-05, |
|
"loss": 0.8648, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.965130359787764e-05, |
|
"loss": 0.8442, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9640557178984152e-05, |
|
"loss": 0.8372, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.962965069651175e-05, |
|
"loss": 0.8479, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9618584331542345e-05, |
|
"loss": 0.8283, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.960735826781242e-05, |
|
"loss": 0.8316, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.959597269170995e-05, |
|
"loss": 0.8203, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9584427792271317e-05, |
|
"loss": 0.8137, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9572723761178168e-05, |
|
"loss": 0.8307, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9560860792754248e-05, |
|
"loss": 0.7982, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.954883908396215e-05, |
|
"loss": 0.8406, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9536658834400058e-05, |
|
"loss": 0.8073, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.952432024629844e-05, |
|
"loss": 0.8122, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9511823524516677e-05, |
|
"loss": 0.826, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9499168876539666e-05, |
|
"loss": 0.7872, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.948635651247437e-05, |
|
"loss": 0.8111, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9473386645046344e-05, |
|
"loss": 0.8503, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9460259489596192e-05, |
|
"loss": 0.8338, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9446975264075993e-05, |
|
"loss": 0.8166, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.943353418904568e-05, |
|
"loss": 0.8383, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9419936487669396e-05, |
|
"loss": 0.8706, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.940618238571175e-05, |
|
"loss": 0.843, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.939227211153411e-05, |
|
"loss": 0.8368, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9378205896090796e-05, |
|
"loss": 0.8077, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.936398397292523e-05, |
|
"loss": 0.7824, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.934960657816608e-05, |
|
"loss": 0.8343, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9335073950523335e-05, |
|
"loss": 0.7876, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9320386331284334e-05, |
|
"loss": 0.7896, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9305543964309756e-05, |
|
"loss": 0.8056, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.92905470960296e-05, |
|
"loss": 0.8155, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9275395975439054e-05, |
|
"loss": 0.8526, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9260090854094388e-05, |
|
"loss": 0.8044, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9244631986108768e-05, |
|
"loss": 0.7958, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.922901962814803e-05, |
|
"loss": 0.8284, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.921325403942644e-05, |
|
"loss": 0.8291, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.9197335481702374e-05, |
|
"loss": 0.8035, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9181264219273957e-05, |
|
"loss": 0.7947, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.916504051897472e-05, |
|
"loss": 0.8085, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9148664650169128e-05, |
|
"loss": 0.8082, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9132136884748114e-05, |
|
"loss": 0.801, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9115457497124588e-05, |
|
"loss": 0.781, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9098626764228854e-05, |
|
"loss": 0.7741, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9081644965504035e-05, |
|
"loss": 0.8164, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.906451238290141e-05, |
|
"loss": 0.8238, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.904722930087575e-05, |
|
"loss": 0.8289, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.9029796006380595e-05, |
|
"loss": 0.8435, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.9012212788863475e-05, |
|
"loss": 0.837, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8994479940261122e-05, |
|
"loss": 0.8466, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8976597754994614e-05, |
|
"loss": 0.8067, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8958566529964475e-05, |
|
"loss": 0.845, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.8940386564545773e-05, |
|
"loss": 0.8186, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.8922058160583133e-05, |
|
"loss": 0.8188, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.890358162238572e-05, |
|
"loss": 0.8225, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.8884957256722197e-05, |
|
"loss": 0.8152, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.886618537281562e-05, |
|
"loss": 0.8285, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.8847266282338333e-05, |
|
"loss": 0.7959, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.8828200299406747e-05, |
|
"loss": 0.7822, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.8808987740576166e-05, |
|
"loss": 0.8137, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.8789628924835512e-05, |
|
"loss": 0.825, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.8770124173602026e-05, |
|
"loss": 0.823, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.8750473810715944e-05, |
|
"loss": 0.8252, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.8730678162435112e-05, |
|
"loss": 0.82, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.871073755742957e-05, |
|
"loss": 0.7967, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8690652326776088e-05, |
|
"loss": 0.8253, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8670422803952696e-05, |
|
"loss": 0.7896, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8650049324833107e-05, |
|
"loss": 0.7918, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8629532227681175e-05, |
|
"loss": 0.8054, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8608871853145257e-05, |
|
"loss": 0.788, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8588068544252572e-05, |
|
"loss": 0.7879, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.85671226464035e-05, |
|
"loss": 0.7775, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8546034507365832e-05, |
|
"loss": 0.7861, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.852480447726903e-05, |
|
"loss": 0.8016, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.850343290859838e-05, |
|
"loss": 0.817, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8481920156189157e-05, |
|
"loss": 0.7894, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8460266577220733e-05, |
|
"loss": 0.8207, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8438472531210645e-05, |
|
"loss": 0.7807, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8416538380008616e-05, |
|
"loss": 0.8199, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.839446448779057e-05, |
|
"loss": 0.8094, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8372251221052556e-05, |
|
"loss": 0.8002, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8349898948604686e-05, |
|
"loss": 0.8008, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8327408041565013e-05, |
|
"loss": 0.8159, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8304778873353345e-05, |
|
"loss": 0.7965, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8282011819685064e-05, |
|
"loss": 0.7865, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8259107258564897e-05, |
|
"loss": 0.7781, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.823606557028061e-05, |
|
"loss": 0.7835, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.821288713739672e-05, |
|
"loss": 0.8132, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.818957234474813e-05, |
|
"loss": 0.8205, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.816612157943376e-05, |
|
"loss": 0.7924, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8142535230810078e-05, |
|
"loss": 0.836, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8118813690484686e-05, |
|
"loss": 0.8127, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8094957352309773e-05, |
|
"loss": 0.7757, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8070966612375612e-05, |
|
"loss": 0.8037, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8046841869003962e-05, |
|
"loss": 0.7764, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8022583522741456e-05, |
|
"loss": 0.8267, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.799819197635297e-05, |
|
"loss": 0.8251, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.79736676348149e-05, |
|
"loss": 0.8104, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.794901090530848e-05, |
|
"loss": 0.7853, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7924222197212984e-05, |
|
"loss": 0.7996, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7899301922098958e-05, |
|
"loss": 0.8062, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7874250493721362e-05, |
|
"loss": 0.8023, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7849068328012734e-05, |
|
"loss": 0.8263, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7823755843076233e-05, |
|
"loss": 0.7949, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7798313459178754e-05, |
|
"loss": 0.7921, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7772741598743913e-05, |
|
"loss": 0.7891, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.774704068634504e-05, |
|
"loss": 0.7698, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.772121114869814e-05, |
|
"loss": 0.7697, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.7695253414654796e-05, |
|
"loss": 0.7906, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.766916791519506e-05, |
|
"loss": 0.7894, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.7642955083420288e-05, |
|
"loss": 0.8071, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.7616615354545956e-05, |
|
"loss": 0.8193, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.759014916589443e-05, |
|
"loss": 0.7644, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.7563556956887705e-05, |
|
"loss": 0.7712, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.7536839169040107e-05, |
|
"loss": 0.7797, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.7509996245950977e-05, |
|
"loss": 0.799, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.7483028633297287e-05, |
|
"loss": 0.8082, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.745593677882624e-05, |
|
"loss": 0.7822, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.7428721132347863e-05, |
|
"loss": 0.7855, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.740138214572751e-05, |
|
"loss": 0.8207, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.7373920272878357e-05, |
|
"loss": 0.789, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.73463359697539e-05, |
|
"loss": 0.8216, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.731862969434035e-05, |
|
"loss": 0.7807, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.729080190664904e-05, |
|
"loss": 0.7883, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7262853068708807e-05, |
|
"loss": 0.7688, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7234783644558276e-05, |
|
"loss": 0.7807, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.72065941002382e-05, |
|
"loss": 0.7771, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7178284903783698e-05, |
|
"loss": 0.7987, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.71498565252165e-05, |
|
"loss": 0.7784, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7121309436537125e-05, |
|
"loss": 0.7796, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.7092644111717052e-05, |
|
"loss": 0.7717, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.7063861026690863e-05, |
|
"loss": 0.7564, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.7034960659348314e-05, |
|
"loss": 0.8182, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.7005943489526432e-05, |
|
"loss": 0.7806, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.697680999900152e-05, |
|
"loss": 0.7406, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6947560671481182e-05, |
|
"loss": 0.8077, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6918195992596274e-05, |
|
"loss": 0.7813, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6888716449892843e-05, |
|
"loss": 0.7902, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.685912253282405e-05, |
|
"loss": 0.8099, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6829414732742013e-05, |
|
"loss": 0.7749, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6799593542889686e-05, |
|
"loss": 0.7565, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6769659458392636e-05, |
|
"loss": 0.7424, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6739612976250836e-05, |
|
"loss": 0.769, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.670945459533042e-05, |
|
"loss": 0.8043, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6679184816355393e-05, |
|
"loss": 0.7834, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6648804141899316e-05, |
|
"loss": 0.8174, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6618313076376966e-05, |
|
"loss": 0.7654, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.658771212603595e-05, |
|
"loss": 0.7987, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6557001798948324e-05, |
|
"loss": 0.7606, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.652618260500213e-05, |
|
"loss": 0.7392, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6495255055892948e-05, |
|
"loss": 0.7951, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6464219665115392e-05, |
|
"loss": 0.7711, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6433076947954593e-05, |
|
"loss": 0.7499, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6401827421477625e-05, |
|
"loss": 0.7558, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.637047160452494e-05, |
|
"loss": 0.7708, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6339010017701757e-05, |
|
"loss": 0.8055, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6307443183369382e-05, |
|
"loss": 0.756, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6275771625636592e-05, |
|
"loss": 0.7945, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6243995870350875e-05, |
|
"loss": 0.7834, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.621211644508974e-05, |
|
"loss": 0.716, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6180133879151943e-05, |
|
"loss": 0.7796, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6148048703548696e-05, |
|
"loss": 0.8073, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.6115861450994855e-05, |
|
"loss": 0.767, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.6083572655900074e-05, |
|
"loss": 0.7638, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.6051182854359935e-05, |
|
"loss": 0.8298, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.601869258414705e-05, |
|
"loss": 0.7502, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5986102384702112e-05, |
|
"loss": 0.775, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5953412797124968e-05, |
|
"loss": 0.7736, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.592062436416561e-05, |
|
"loss": 0.765, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5887737630215188e-05, |
|
"loss": 0.7844, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5854753141296938e-05, |
|
"loss": 0.7701, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5821671445057157e-05, |
|
"loss": 0.7761, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5788493090756074e-05, |
|
"loss": 0.7867, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5755218629258747e-05, |
|
"loss": 0.7642, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.572184861302593e-05, |
|
"loss": 0.7152, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.5688383596104875e-05, |
|
"loss": 0.7047, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.565482413412014e-05, |
|
"loss": 0.7114, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.562117078426438e-05, |
|
"loss": 0.7058, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.558742410528907e-05, |
|
"loss": 0.7027, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.5553584657495257e-05, |
|
"loss": 0.7172, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.5519653002724235e-05, |
|
"loss": 0.7043, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5485629704348223e-05, |
|
"loss": 0.7049, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5451515327261012e-05, |
|
"loss": 0.7158, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.5417310437868586e-05, |
|
"loss": 0.7166, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.5383015604079723e-05, |
|
"loss": 0.6919, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.5348631395296553e-05, |
|
"loss": 0.7179, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.531415838240512e-05, |
|
"loss": 0.6826, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.5279597137765894e-05, |
|
"loss": 0.726, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.5244948235204265e-05, |
|
"loss": 0.7188, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.5210212250001024e-05, |
|
"loss": 0.7284, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.5175389758882803e-05, |
|
"loss": 0.714, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.5140481340012515e-05, |
|
"loss": 0.7043, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.5105487572979729e-05, |
|
"loss": 0.729, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.5070409038791077e-05, |
|
"loss": 0.743, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.5035246319860578e-05, |
|
"loss": 0.7074, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.7443, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.4964670664409136e-05, |
|
"loss": 0.7464, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.4929258899666102e-05, |
|
"loss": 0.7055, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.48937652937176e-05, |
|
"loss": 0.7015, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.4858190435869155e-05, |
|
"loss": 0.6848, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.482253491677533e-05, |
|
"loss": 0.688, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.4786799328429915e-05, |
|
"loss": 0.6706, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.4750984264156103e-05, |
|
"loss": 0.7108, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.4715090318596637e-05, |
|
"loss": 0.6752, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.4679118087703933e-05, |
|
"loss": 0.7324, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4643068168730203e-05, |
|
"loss": 0.7175, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4606941160217511e-05, |
|
"loss": 0.7064, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4570737661987862e-05, |
|
"loss": 0.7336, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.4534458275133214e-05, |
|
"loss": 0.7196, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.4498103602005537e-05, |
|
"loss": 0.7266, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.446167424620677e-05, |
|
"loss": 0.7367, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.4425170812578837e-05, |
|
"loss": 0.7055, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.4388593907193571e-05, |
|
"loss": 0.7063, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.435194413734268e-05, |
|
"loss": 0.6754, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.431522211152764e-05, |
|
"loss": 0.7158, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.4278428439449613e-05, |
|
"loss": 0.6712, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.424156373199931e-05, |
|
"loss": 0.6762, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.420462860124685e-05, |
|
"loss": 0.682, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.416762366043161e-05, |
|
"loss": 0.6596, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.4130549523952022e-05, |
|
"loss": 0.6997, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.4093406807355389e-05, |
|
"loss": 0.6553, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.4056196127327661e-05, |
|
"loss": 0.7142, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.4018918101683191e-05, |
|
"loss": 0.6933, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.3981573349354492e-05, |
|
"loss": 0.7041, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.3944162490381931e-05, |
|
"loss": 0.7147, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.3906686145903467e-05, |
|
"loss": 0.6945, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3869144938144325e-05, |
|
"loss": 0.7096, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3831539490406658e-05, |
|
"loss": 0.6956, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3793870427059204e-05, |
|
"loss": 0.6735, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3756138373526931e-05, |
|
"loss": 0.7222, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3718343956280624e-05, |
|
"loss": 0.6984, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3680487802826523e-05, |
|
"loss": 0.6959, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3642570541695867e-05, |
|
"loss": 0.708, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3604592802434486e-05, |
|
"loss": 0.7124, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.3566555215592323e-05, |
|
"loss": 0.7126, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.3528458412712996e-05, |
|
"loss": 0.6948, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.3490303026323289e-05, |
|
"loss": 0.6973, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.3452089689922646e-05, |
|
"loss": 0.6928, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.3413819037972682e-05, |
|
"loss": 0.7048, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.3375491705886614e-05, |
|
"loss": 0.6836, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.3337108330018737e-05, |
|
"loss": 0.7153, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.3298669547653848e-05, |
|
"loss": 0.7103, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.3260175996996667e-05, |
|
"loss": 0.7268, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.322162831716123e-05, |
|
"loss": 0.7019, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.3183027148160304e-05, |
|
"loss": 0.6769, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.3144373130894729e-05, |
|
"loss": 0.6861, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.3105666907142804e-05, |
|
"loss": 0.6948, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.3066909119549615e-05, |
|
"loss": 0.6713, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.3028100411616369e-05, |
|
"loss": 0.7094, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.2989241427689712e-05, |
|
"loss": 0.6863, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.295033281295103e-05, |
|
"loss": 0.7025, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.2911375213405738e-05, |
|
"loss": 0.6959, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.2872369275872547e-05, |
|
"loss": 0.7007, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.283331564797274e-05, |
|
"loss": 0.7085, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.2794214978119399e-05, |
|
"loss": 0.7446, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.2755067915506664e-05, |
|
"loss": 0.7093, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.271587511009893e-05, |
|
"loss": 0.6597, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.2676637212620069e-05, |
|
"loss": 0.6855, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.263735487454262e-05, |
|
"loss": 0.6607, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.2598028748076987e-05, |
|
"loss": 0.6527, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.255865948616059e-05, |
|
"loss": 0.6636, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2519247742447029e-05, |
|
"loss": 0.717, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2479794171295248e-05, |
|
"loss": 0.6842, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2440299427758646e-05, |
|
"loss": 0.6855, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2400764167574225e-05, |
|
"loss": 0.728, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2361189047151676e-05, |
|
"loss": 0.6984, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.2321574723562511e-05, |
|
"loss": 0.7033, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.2281921854529127e-05, |
|
"loss": 0.6982, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.22422310984139e-05, |
|
"loss": 0.6974, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.2202503114208251e-05, |
|
"loss": 0.7017, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.2162738561521704e-05, |
|
"loss": 0.722, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.2122938100570938e-05, |
|
"loss": 0.7111, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.2083102392168814e-05, |
|
"loss": 0.6866, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.2043232097713424e-05, |
|
"loss": 0.7032, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.2003327879177085e-05, |
|
"loss": 0.6944, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.1963390399095368e-05, |
|
"loss": 0.7073, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.1923420320556085e-05, |
|
"loss": 0.6594, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1883418307188292e-05, |
|
"loss": 0.698, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1843385023151255e-05, |
|
"loss": 0.6736, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1803321133123442e-05, |
|
"loss": 0.6595, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1763227302291464e-05, |
|
"loss": 0.6944, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1723104196339055e-05, |
|
"loss": 0.7015, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1682952481436e-05, |
|
"loss": 0.6616, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.1642772824227091e-05, |
|
"loss": 0.6924, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.160256589182104e-05, |
|
"loss": 0.6467, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1562332351779422e-05, |
|
"loss": 0.6802, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1522072872105576e-05, |
|
"loss": 0.6882, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1481788121233523e-05, |
|
"loss": 0.7198, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1441478768016871e-05, |
|
"loss": 0.7014, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1401145481717695e-05, |
|
"loss": 0.7074, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1360788931995434e-05, |
|
"loss": 0.6883, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.1320409788895786e-05, |
|
"loss": 0.7337, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.1280008722839552e-05, |
|
"loss": 0.6999, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.1239586404611542e-05, |
|
"loss": 0.688, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.1199143505349401e-05, |
|
"loss": 0.6497, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.1158680696532502e-05, |
|
"loss": 0.667, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.1118198649970762e-05, |
|
"loss": 0.6805, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.1077698037793516e-05, |
|
"loss": 0.6681, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.1037179532438345e-05, |
|
"loss": 0.6398, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0996643806639904e-05, |
|
"loss": 0.6877, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0956091533418777e-05, |
|
"loss": 0.6758, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0915523386070277e-05, |
|
"loss": 0.6901, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0874940038153279e-05, |
|
"loss": 0.6693, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0834342163479035e-05, |
|
"loss": 0.6775, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.079373043609999e-05, |
|
"loss": 0.6905, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0753105530298582e-05, |
|
"loss": 0.6888, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0712468120576059e-05, |
|
"loss": 0.6849, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.067181888164127e-05, |
|
"loss": 0.66, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0631158488399463e-05, |
|
"loss": 0.6826, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0590487615941088e-05, |
|
"loss": 0.673, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.054980693953058e-05, |
|
"loss": 0.6772, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0509117134595152e-05, |
|
"loss": 0.6454, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.046841887671358e-05, |
|
"loss": 0.6422, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.042771284160498e-05, |
|
"loss": 0.7042, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.03869997051176e-05, |
|
"loss": 0.6828, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0346280143217593e-05, |
|
"loss": 0.69, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0305554831977788e-05, |
|
"loss": 0.7156, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0264824447566484e-05, |
|
"loss": 0.6925, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.0224089666236195e-05, |
|
"loss": 0.7213, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.0183351164312448e-05, |
|
"loss": 0.6712, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.0142609618182539e-05, |
|
"loss": 0.7007, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.010186570428431e-05, |
|
"loss": 0.6874, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.0061120099094917e-05, |
|
"loss": 0.6735, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.002037347911959e-05, |
|
"loss": 0.6991, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.979626520880413e-06, |
|
"loss": 0.6606, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.938879900905086e-06, |
|
"loss": 0.6891, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.898134295715693e-06, |
|
"loss": 0.6594, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.857390381817463e-06, |
|
"loss": 0.6597, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.816648835687557e-06, |
|
"loss": 0.6806, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.775910333763809e-06, |
|
"loss": 0.6837, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.73517555243352e-06, |
|
"loss": 0.6813, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.694445168022214e-06, |
|
"loss": 0.7019, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.653719856782412e-06, |
|
"loss": 0.6624, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.613000294882406e-06, |
|
"loss": 0.6842, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.572287158395025e-06, |
|
"loss": 0.6762, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.531581123286426e-06, |
|
"loss": 0.6414, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.49088286540485e-06, |
|
"loss": 0.6768, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.45019306046942e-06, |
|
"loss": 0.6537, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.409512384058916e-06, |
|
"loss": 0.6547, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.368841511600539e-06, |
|
"loss": 0.6429, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.328181118358734e-06, |
|
"loss": 0.6631, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.287531879423943e-06, |
|
"loss": 0.6514, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.246894469701423e-06, |
|
"loss": 0.6907, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.206269563900013e-06, |
|
"loss": 0.6611, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.165657836520969e-06, |
|
"loss": 0.6811, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.125059961846725e-06, |
|
"loss": 0.6474, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.084476613929726e-06, |
|
"loss": 0.6717, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.043908466581225e-06, |
|
"loss": 0.6683, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.003356193360097e-06, |
|
"loss": 0.6768, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.96282046756166e-06, |
|
"loss": 0.6776, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.922301962206487e-06, |
|
"loss": 0.6714, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.881801350029242e-06, |
|
"loss": 0.641, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.841319303467502e-06, |
|
"loss": 0.6795, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.8008564946506e-06, |
|
"loss": 0.6439, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.760413595388463e-06, |
|
"loss": 0.6314, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.719991277160453e-06, |
|
"loss": 0.6687, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.67959021110422e-06, |
|
"loss": 0.6709, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.63921106800457e-06, |
|
"loss": 0.6532, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.59885451828231e-06, |
|
"loss": 0.7103, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.558521231983134e-06, |
|
"loss": 0.7086, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.518211878766475e-06, |
|
"loss": 0.6436, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.477927127894424e-06, |
|
"loss": 0.6654, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.43766764822058e-06, |
|
"loss": 0.6417, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.39743410817896e-06, |
|
"loss": 0.6835, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.35722717577291e-06, |
|
"loss": 0.6904, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.317047518564e-06, |
|
"loss": 0.6609, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.276895803660947e-06, |
|
"loss": 0.6907, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.236772697708537e-06, |
|
"loss": 0.6465, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.196678866876561e-06, |
|
"loss": 0.6563, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.156614976848748e-06, |
|
"loss": 0.6752, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.116581692811711e-06, |
|
"loss": 0.6844, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.076579679443919e-06, |
|
"loss": 0.7005, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.036609600904636e-06, |
|
"loss": 0.6601, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.996672120822918e-06, |
|
"loss": 0.6684, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.956767902286578e-06, |
|
"loss": 0.6402, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.916897607831189e-06, |
|
"loss": 0.6438, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.877061899429067e-06, |
|
"loss": 0.6256, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.8372614384783e-06, |
|
"loss": 0.644, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.797496885791754e-06, |
|
"loss": 0.6987, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.757768901586106e-06, |
|
"loss": 0.6808, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.718078145470878e-06, |
|
"loss": 0.6762, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.678425276437494e-06, |
|
"loss": 0.6372, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.638810952848328e-06, |
|
"loss": 0.6541, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.599235832425781e-06, |
|
"loss": 0.6541, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.559700572241354e-06, |
|
"loss": 0.6985, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.520205828704753e-06, |
|
"loss": 0.6441, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.480752257552972e-06, |
|
"loss": 0.6647, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.441340513839412e-06, |
|
"loss": 0.6855, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.401971251923015e-06, |
|
"loss": 0.6828, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.362645125457381e-06, |
|
"loss": 0.6739, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.323362787379935e-06, |
|
"loss": 0.6812, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.284124889901074e-06, |
|
"loss": 0.6458, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.244932084493338e-06, |
|
"loss": 0.6302, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.205785021880603e-06, |
|
"loss": 0.6504, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.166684352027265e-06, |
|
"loss": 0.6717, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.127630724127457e-06, |
|
"loss": 0.6382, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.088624786594266e-06, |
|
"loss": 0.6781, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.049667187048974e-06, |
|
"loss": 0.6379, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.01075857231029e-06, |
|
"loss": 0.6438, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.971899588383635e-06, |
|
"loss": 0.6584, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.9330908804503874e-06, |
|
"loss": 0.6621, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.894333092857199e-06, |
|
"loss": 0.6591, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.855626869105273e-06, |
|
"loss": 0.6366, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.8169728518397e-06, |
|
"loss": 0.6509, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.778371682838775e-06, |
|
"loss": 0.6809, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.739824003003339e-06, |
|
"loss": 0.6636, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.701330452346156e-06, |
|
"loss": 0.6704, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.662891669981266e-06, |
|
"loss": 0.6648, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.624508294113388e-06, |
|
"loss": 0.6388, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.586180962027322e-06, |
|
"loss": 0.6193, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.547910310077354e-06, |
|
"loss": 0.6301, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.509696973676714e-06, |
|
"loss": 0.6553, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.471541587287003e-06, |
|
"loss": 0.6595, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.433444784407679e-06, |
|
"loss": 0.7049, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.395407197565518e-06, |
|
"loss": 0.6673, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.357429458304135e-06, |
|
"loss": 0.668, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.319512197173479e-06, |
|
"loss": 0.6426, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.2816560437193786e-06, |
|
"loss": 0.6404, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.243861626473073e-06, |
|
"loss": 0.6102, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 6.206129572940798e-06, |
|
"loss": 0.6502, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.168460509593343e-06, |
|
"loss": 0.6352, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.1308550618556765e-06, |
|
"loss": 0.6234, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.093313854096534e-06, |
|
"loss": 0.6304, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.0558375096180724e-06, |
|
"loss": 0.6817, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.0184266506455125e-06, |
|
"loss": 0.6517, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.981081898316809e-06, |
|
"loss": 0.639, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.943803872672344e-06, |
|
"loss": 0.681, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.906593192644615e-06, |
|
"loss": 0.6147, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.869450476047984e-06, |
|
"loss": 0.6364, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.832376339568394e-06, |
|
"loss": 0.6765, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.795371398753153e-06, |
|
"loss": 0.658, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.758436268000696e-06, |
|
"loss": 0.6379, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.72157156055039e-06, |
|
"loss": 0.6504, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.684777888472359e-06, |
|
"loss": 0.6907, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.648055862657321e-06, |
|
"loss": 0.6411, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.611406092806429e-06, |
|
"loss": 0.656, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.574829187421166e-06, |
|
"loss": 0.641, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.538325753793227e-06, |
|
"loss": 0.6668, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.501896397994465e-06, |
|
"loss": 0.6293, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.465541724866787e-06, |
|
"loss": 0.6394, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.429262338012144e-06, |
|
"loss": 0.6625, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.393058839782492e-06, |
|
"loss": 0.6709, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.356931831269798e-06, |
|
"loss": 0.5811, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.3208819122960674e-06, |
|
"loss": 0.5701, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.284909681403367e-06, |
|
"loss": 0.5856, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.249015735843901e-06, |
|
"loss": 0.5834, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.213200671570085e-06, |
|
"loss": 0.5851, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.177465083224671e-06, |
|
"loss": 0.5928, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.141809564130847e-06, |
|
"loss": 0.6236, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.106234706282405e-06, |
|
"loss": 0.5716, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.070741100333901e-06, |
|
"loss": 0.5997, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.035329335590868e-06, |
|
"loss": 0.6034, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.5754, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.964753680139425e-06, |
|
"loss": 0.5561, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.9295909612089265e-06, |
|
"loss": 0.6011, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.894512427020275e-06, |
|
"loss": 0.5922, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.859518659987491e-06, |
|
"loss": 0.6, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.824610241117198e-06, |
|
"loss": 0.6257, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.789787749998979e-06, |
|
"loss": 0.5812, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.755051764795737e-06, |
|
"loss": 0.5979, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.720402862234105e-06, |
|
"loss": 0.6245, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.6858416175948795e-06, |
|
"loss": 0.6298, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.651368604703449e-06, |
|
"loss": 0.5722, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.616984395920282e-06, |
|
"loss": 0.6015, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.582689562131416e-06, |
|
"loss": 0.6211, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.548484672738992e-06, |
|
"loss": 0.638, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.514370295651781e-06, |
|
"loss": 0.5722, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.480346997275767e-06, |
|
"loss": 0.5818, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.446415342504742e-06, |
|
"loss": 0.5717, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.412575894710931e-06, |
|
"loss": 0.551, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.378829215735624e-06, |
|
"loss": 0.5558, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.345175865879863e-06, |
|
"loss": 0.5926, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.311616403895126e-06, |
|
"loss": 0.5676, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.27815138697407e-06, |
|
"loss": 0.6067, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.244781370741254e-06, |
|
"loss": 0.5916, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.211506909243932e-06, |
|
"loss": 0.595, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.17832855494285e-06, |
|
"loss": 0.6141, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.145246858703064e-06, |
|
"loss": 0.6039, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.1122623697848164e-06, |
|
"loss": 0.6137, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.079375635834392e-06, |
|
"loss": 0.6145, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.046587202875036e-06, |
|
"loss": 0.5684, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.013897615297889e-06, |
|
"loss": 0.5587, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.981307415852949e-06, |
|
"loss": 0.5996, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.948817145640062e-06, |
|
"loss": 0.5766, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.916427344099928e-06, |
|
"loss": 0.5676, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.884138549005149e-06, |
|
"loss": 0.5692, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.851951296451305e-06, |
|
"loss": 0.5753, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.819866120848058e-06, |
|
"loss": 0.5462, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.7878835549102623e-06, |
|
"loss": 0.5613, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.756004129649129e-06, |
|
"loss": 0.5663, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.72422837436341e-06, |
|
"loss": 0.5799, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.6925568166306182e-06, |
|
"loss": 0.5639, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.660989982298249e-06, |
|
"loss": 0.5945, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.6295283954750626e-06, |
|
"loss": 0.5932, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.598172578522382e-06, |
|
"loss": 0.5985, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.5669230520454114e-06, |
|
"loss": 0.5812, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.5357803348846087e-06, |
|
"loss": 0.5743, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.504744944107055e-06, |
|
"loss": 0.5741, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.4738173949978737e-06, |
|
"loss": 0.6024, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.442998201051677e-06, |
|
"loss": 0.5778, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.412287873964052e-06, |
|
"loss": 0.5701, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.3816869236230387e-06, |
|
"loss": 0.6132, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.3511958581006874e-06, |
|
"loss": 0.5816, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.320815183644608e-06, |
|
"loss": 0.5802, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.2905454046695817e-06, |
|
"loss": 0.5927, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.2603870237491653e-06, |
|
"loss": 0.5895, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.2303405416073673e-06, |
|
"loss": 0.5768, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.2004064571103154e-06, |
|
"loss": 0.583, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.1705852672579853e-06, |
|
"loss": 0.5778, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.1408774671759524e-06, |
|
"loss": 0.5905, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.111283550107158e-06, |
|
"loss": 0.6088, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.0818040074037303e-06, |
|
"loss": 0.6004, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.052439328518817e-06, |
|
"loss": 0.597, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.0231900009984793e-06, |
|
"loss": 0.5485, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.994056510473571e-06, |
|
"loss": 0.5902, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.9650393406516885e-06, |
|
"loss": 0.5635, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.9361389733091416e-06, |
|
"loss": 0.5791, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.907355888282948e-06, |
|
"loss": 0.5661, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.878690563462877e-06, |
|
"loss": 0.596, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.8501434747835012e-06, |
|
"loss": 0.561, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.8217150962163044e-06, |
|
"loss": 0.5744, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.793405899761803e-06, |
|
"loss": 0.6, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.7652163554417276e-06, |
|
"loss": 0.6088, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.737146931291198e-06, |
|
"loss": 0.5956, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.709198093350961e-06, |
|
"loss": 0.5821, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.6813703056596528e-06, |
|
"loss": 0.5486, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.6536640302461036e-06, |
|
"loss": 0.5498, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.6260797271216475e-06, |
|
"loss": 0.5392, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.5986178542724983e-06, |
|
"loss": 0.5426, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.5712788676521382e-06, |
|
"loss": 0.5824, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.544063221173758e-06, |
|
"loss": 0.5935, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.5169713667027162e-06, |
|
"loss": 0.5529, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.490003754049024e-06, |
|
"loss": 0.5908, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.463160830959894e-06, |
|
"loss": 0.5749, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.4364430431122997e-06, |
|
"loss": 0.5887, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.4098508341055714e-06, |
|
"loss": 0.6168, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.3833846454540457e-06, |
|
"loss": 0.5605, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.357044916579714e-06, |
|
"loss": 0.6113, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.3308320848049436e-06, |
|
"loss": 0.5886, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.3047465853452058e-06, |
|
"loss": 0.6024, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.278788851301862e-06, |
|
"loss": 0.5963, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.2529593136549623e-06, |
|
"loss": 0.5684, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.2272584012560894e-06, |
|
"loss": 0.5925, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.201686540821245e-06, |
|
"loss": 0.5529, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.176244156923768e-06, |
|
"loss": 0.5924, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.1509316719872718e-06, |
|
"loss": 0.5543, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.1257495062786404e-06, |
|
"loss": 0.5518, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.100698077901049e-06, |
|
"loss": 0.5693, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.0757778027870192e-06, |
|
"loss": 0.5525, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0509890946915244e-06, |
|
"loss": 0.5783, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.026332365185102e-06, |
|
"loss": 0.5564, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.001808023647035e-06, |
|
"loss": 0.5712, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.9774164772585435e-06, |
|
"loss": 0.5682, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.953158130996039e-06, |
|
"loss": 0.533, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9290333876243892e-06, |
|
"loss": 0.5506, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9050426476902296e-06, |
|
"loss": 0.6095, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.8811863095153182e-06, |
|
"loss": 0.5872, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.8574647691899217e-06, |
|
"loss": 0.5962, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.833878420566243e-06, |
|
"loss": 0.5853, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8104276552518696e-06, |
|
"loss": 0.616, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.7871128626032851e-06, |
|
"loss": 0.5858, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.7639344297193927e-06, |
|
"loss": 0.5746, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.7408927414351051e-06, |
|
"loss": 0.5594, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.7179881803149355e-06, |
|
"loss": 0.5367, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.695221126646659e-06, |
|
"loss": 0.5795, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.6725919584349882e-06, |
|
"loss": 0.5415, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.650101051395313e-06, |
|
"loss": 0.5419, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.6277487789474477e-06, |
|
"loss": 0.5623, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.6055355122094352e-06, |
|
"loss": 0.555, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.5834616199913877e-06, |
|
"loss": 0.5766, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.5615274687893588e-06, |
|
"loss": 0.5518, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.5397334227792692e-06, |
|
"loss": 0.5426, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.5180798438108468e-06, |
|
"loss": 0.5697, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.496567091401625e-06, |
|
"loss": 0.5884, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.4751955227309722e-06, |
|
"loss": 0.5664, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.4539654926341695e-06, |
|
"loss": 0.5626, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.4328773535965045e-06, |
|
"loss": 0.5791, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.4119314557474272e-06, |
|
"loss": 0.5769, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.3911281468547432e-06, |
|
"loss": 0.5395, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.3704677723188254e-06, |
|
"loss": 0.5463, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.3499506751668933e-06, |
|
"loss": 0.5307, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.3295771960473057e-06, |
|
"loss": 0.5629, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.3093476732239129e-06, |
|
"loss": 0.5902, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.2892624425704324e-06, |
|
"loss": 0.5644, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.2693218375648886e-06, |
|
"loss": 0.5903, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.249526189284057e-06, |
|
"loss": 0.5831, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.229875826397976e-06, |
|
"loss": 0.5983, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.2103710751644916e-06, |
|
"loss": 0.554, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.191012259423835e-06, |
|
"loss": 0.5906, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.1717997005932546e-06, |
|
"loss": 0.5605, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.1527337176616704e-06, |
|
"loss": 0.5725, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.133814627184382e-06, |
|
"loss": 0.5659, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.1150427432778078e-06, |
|
"loss": 0.552, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.0964183776142834e-06, |
|
"loss": 0.5884, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.0779418394168695e-06, |
|
"loss": 0.5384, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.0596134354542287e-06, |
|
"loss": 0.5287, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.0414334700355277e-06, |
|
"loss": 0.5616, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.0234022450053916e-06, |
|
"loss": 0.5769, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.0055200597388793e-06, |
|
"loss": 0.5718, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 9.877872111365273e-07, |
|
"loss": 0.5537, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 9.702039936194075e-07, |
|
"loss": 0.5825, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 9.527706991242502e-07, |
|
"loss": 0.5353, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.354876170985927e-07, |
|
"loss": 0.5586, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.18355034495968e-07, |
|
"loss": 0.5504, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.013732357711469e-07, |
|
"loss": 0.5497, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 8.845425028754151e-07, |
|
"loss": 0.5369, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 8.678631152518869e-07, |
|
"loss": 0.5419, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 8.513353498308741e-07, |
|
"loss": 0.5273, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 8.349594810252792e-07, |
|
"loss": 0.5392, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 8.187357807260432e-07, |
|
"loss": 0.5411, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.0266451829763e-07, |
|
"loss": 0.5713, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.867459605735606e-07, |
|
"loss": 0.5693, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.709803718519726e-07, |
|
"loss": 0.5477, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.553680138912378e-07, |
|
"loss": 0.5572, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.39909145905614e-07, |
|
"loss": 0.5072, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.246040245609465e-07, |
|
"loss": 0.5501, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.094529039704013e-07, |
|
"loss": 0.5528, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.944560356902452e-07, |
|
"loss": 0.5601, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.796136687156696e-07, |
|
"loss": 0.5736, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.649260494766674e-07, |
|
"loss": 0.5616, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.503934218339214e-07, |
|
"loss": 0.5281, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.36016027074774e-07, |
|
"loss": 0.5233, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.217941039092068e-07, |
|
"loss": 0.5144, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.077278884658888e-07, |
|
"loss": 0.5313, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.938176142882512e-07, |
|
"loss": 0.5633, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.800635123306053e-07, |
|
"loss": 0.5463, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.664658109543186e-07, |
|
"loss": 0.5405, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.530247359240093e-07, |
|
"loss": 0.5892, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.39740510403809e-07, |
|
"loss": 0.5868, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.266133549536578e-07, |
|
"loss": 0.546, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.136434875256335e-07, |
|
"loss": 0.5413, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.00831123460338e-07, |
|
"loss": 0.5368, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.881764754833229e-07, |
|
"loss": 0.5512, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.7567975370155894e-07, |
|
"loss": 0.5552, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.633411655999431e-07, |
|
"loss": 0.5737, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.511609160378549e-07, |
|
"loss": 0.5205, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.391392072457545e-07, |
|
"loss": 0.5367, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.272762388218332e-07, |
|
"loss": 0.5573, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.155722077286872e-07, |
|
"loss": 0.551, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.0402730829005254e-07, |
|
"loss": 0.5845, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.9264173218758083e-07, |
|
"loss": 0.557, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.8141566845765645e-07, |
|
"loss": 0.5567, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.703493034882544e-07, |
|
"loss": 0.5477, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.594428210158496e-07, |
|
"loss": 0.5048, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.4869640212236155e-07, |
|
"loss": 0.5243, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.3811022523215333e-07, |
|
"loss": 0.5233, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.2768446610906834e-07, |
|
"loss": 0.5494, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.174192978535051e-07, |
|
"loss": 0.581, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.073148908995538e-07, |
|
"loss": 0.5739, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.973714130121563e-07, |
|
"loss": 0.5195, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.8758902928432976e-07, |
|
"loss": 0.5319, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.779679021344206e-07, |
|
"loss": 0.5501, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.685081913034082e-07, |
|
"loss": 0.5707, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.592100538522524e-07, |
|
"loss": 0.5638, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.500736441592866e-07, |
|
"loss": 0.5396, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.410991139176566e-07, |
|
"loss": 0.5503, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.3228661213279824e-07, |
|
"loss": 0.5876, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.2363628511996692e-07, |
|
"loss": 0.551, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.1514827650180425e-07, |
|
"loss": 0.5742, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.0682272720595642e-07, |
|
"loss": 0.5494, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9865977546273506e-07, |
|
"loss": 0.5381, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.9065955680282245e-07, |
|
"loss": 0.5296, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8282220405501428e-07, |
|
"loss": 0.5507, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.7514784734402267e-07, |
|
"loss": 0.5407, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6763661408831677e-07, |
|
"loss": 0.557, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6028862899799657e-07, |
|
"loss": 0.5233, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.5310401407273246e-07, |
|
"loss": 0.5374, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.460828885997334e-07, |
|
"loss": 0.5456, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.3922536915177533e-07, |
|
"loss": 0.5199, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.3253156958525136e-07, |
|
"loss": 0.5828, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.2600160103829584e-07, |
|
"loss": 0.5235, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.1963557192892772e-07, |
|
"loss": 0.5223, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.1343358795326109e-07, |
|
"loss": 0.5694, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.0739575208373987e-07, |
|
"loss": 0.5463, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.015221645674358e-07, |
|
"loss": 0.5422, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 9.581292292437982e-08, |
|
"loss": 0.5672, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 9.026812194594448e-08, |
|
"loss": 0.5477, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.488785369327068e-08, |
|
"loss": 0.5307, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.96722074957379e-08, |
|
"loss": 0.4877, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.462126994948304e-08, |
|
"loss": 0.5247, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 6.973512491595724e-08, |
|
"loss": 0.5749, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 6.501385352054246e-08, |
|
"loss": 0.5419, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 6.045753415119593e-08, |
|
"loss": 0.5818, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.606624245715453e-08, |
|
"loss": 0.5378, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.1840051347675777e-08, |
|
"loss": 0.5655, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.7779030990826594e-08, |
|
"loss": 0.5387, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.388324881232198e-08, |
|
"loss": 0.4912, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.015276949440261e-08, |
|
"loss": 0.5372, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.6587654974761246e-08, |
|
"loss": 0.5262, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.318796444551464e-08, |
|
"loss": 0.4982, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.995375435222214e-08, |
|
"loss": 0.5247, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.6885078392945295e-08, |
|
"loss": 0.5299, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.3981987517359696e-08, |
|
"loss": 0.566, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.1244529925906754e-08, |
|
"loss": 0.5199, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.8672751068995464e-08, |
|
"loss": 0.5561, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.6266693646245224e-08, |
|
"loss": 0.5584, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.402639760577862e-08, |
|
"loss": 0.5043, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.1951900143558626e-08, |
|
"loss": 0.5549, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.0043235702770215e-08, |
|
"loss": 0.5605, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.300435973246368e-09, |
|
"loss": 0.5288, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.723529890946268e-09, |
|
"loss": 0.5083, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.312543637471246e-09, |
|
"loss": 0.5909, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.067500639634014e-09, |
|
"loss": 0.5326, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.9884215690634265e-09, |
|
"loss": 0.5523, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.075324341868079e-09, |
|
"loss": 0.5396, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.328224118333221e-09, |
|
"loss": 0.5256, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 7.471333026742856e-10, |
|
"loss": 0.5419, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.320615428270557e-10, |
|
"loss": 0.5162, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.30157302877943e-11, |
|
"loss": 0.54, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0, |
|
"loss": 0.5478, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"step": 804, |
|
"total_flos": 4.313168111122514e+17, |
|
"train_loss": 0.6899483537866702, |
|
"train_runtime": 4131.3363, |
|
"train_samples_per_second": 49.831, |
|
"train_steps_per_second": 0.195 |
|
} |
|
], |
|
"max_steps": 804, |
|
"num_train_epochs": 3, |
|
"total_flos": 4.313168111122514e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|