|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 278, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.9528, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.8803, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.8748, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.9064, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 0.8915, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.8656, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 0.8642, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.9196, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8783, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9999318037877998e-05, |
|
"loss": 0.8703, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9997272244526454e-05, |
|
"loss": 0.9208, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9993862898976092e-05, |
|
"loss": 0.912, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.998909046623581e-05, |
|
"loss": 0.8988, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9982955597229275e-05, |
|
"loss": 0.8873, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9975459128706155e-05, |
|
"loss": 0.8688, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.996660208312796e-05, |
|
"loss": 0.9077, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9956385668528614e-05, |
|
"loss": 0.8782, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9944811278349666e-05, |
|
"loss": 0.873, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9931880491250263e-05, |
|
"loss": 0.8998, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9917595070891796e-05, |
|
"loss": 0.9115, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9901956965697387e-05, |
|
"loss": 0.9197, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.988496830858612e-05, |
|
"loss": 0.898, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.986663141668212e-05, |
|
"loss": 0.8908, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9846948790998532e-05, |
|
"loss": 0.8601, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.982592311609639e-05, |
|
"loss": 0.859, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9803557259718472e-05, |
|
"loss": 0.9306, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.977985427239815e-05, |
|
"loss": 0.906, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.975481738704333e-05, |
|
"loss": 0.9386, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9728450018495506e-05, |
|
"loss": 0.8569, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9700755763064e-05, |
|
"loss": 0.8911, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.967173839803545e-05, |
|
"loss": 0.9246, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9641401881158625e-05, |
|
"loss": 0.8808, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.960975035010461e-05, |
|
"loss": 0.8474, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9576788121902457e-05, |
|
"loss": 0.8672, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.954251969235039e-05, |
|
"loss": 0.8729, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.950694973540259e-05, |
|
"loss": 0.8927, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9470083102531724e-05, |
|
"loss": 0.9035, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.943192482206723e-05, |
|
"loss": 0.9003, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9392480098509488e-05, |
|
"loss": 0.8605, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9351754311819978e-05, |
|
"loss": 0.9471, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9309753016687478e-05, |
|
"loss": 0.9558, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9266481941770463e-05, |
|
"loss": 0.9359, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9221946988915745e-05, |
|
"loss": 0.9561, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9176154232353513e-05, |
|
"loss": 0.9012, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9129109917868863e-05, |
|
"loss": 0.8935, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9080820461949886e-05, |
|
"loss": 0.8635, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9031292450912565e-05, |
|
"loss": 0.9522, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.898053264000239e-05, |
|
"loss": 0.9226, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8928547952473037e-05, |
|
"loss": 0.8975, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8875345478642067e-05, |
|
"loss": 0.8776, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8820932474923874e-05, |
|
"loss": 0.8925, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8765316362839955e-05, |
|
"loss": 0.8753, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8708504728006668e-05, |
|
"loss": 0.8859, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.865050531910062e-05, |
|
"loss": 0.9087, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8591326046801813e-05, |
|
"loss": 0.8739, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8530974982714667e-05, |
|
"loss": 0.9321, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8469460358267127e-05, |
|
"loss": 0.8945, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8406790563587958e-05, |
|
"loss": 0.9255, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8342974146362397e-05, |
|
"loss": 0.8816, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8278019810666295e-05, |
|
"loss": 0.8863, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8211936415778986e-05, |
|
"loss": 0.9121, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8144732974974902e-05, |
|
"loss": 0.8759, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8076418654294267e-05, |
|
"loss": 0.9008, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.80070027712929e-05, |
|
"loss": 0.9176, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.793649479377137e-05, |
|
"loss": 0.8681, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7864904338483676e-05, |
|
"loss": 0.915, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.779224116982558e-05, |
|
"loss": 0.8937, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7718515198502816e-05, |
|
"loss": 0.8827, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7643736480179353e-05, |
|
"loss": 0.8496, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7567915214105883e-05, |
|
"loss": 0.9188, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7491061741728703e-05, |
|
"loss": 0.8845, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.741318654527923e-05, |
|
"loss": 0.863, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7334300246344318e-05, |
|
"loss": 0.9035, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.725441360441752e-05, |
|
"loss": 0.8462, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7173537515431612e-05, |
|
"loss": 0.8881, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7091683010272447e-05, |
|
"loss": 0.8944, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.700886125327443e-05, |
|
"loss": 0.9079, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.692508354069779e-05, |
|
"loss": 0.8947, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.684036129918786e-05, |
|
"loss": 0.8519, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6754706084216556e-05, |
|
"loss": 0.9102, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6668129578506315e-05, |
|
"loss": 0.9016, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.658064359043664e-05, |
|
"loss": 0.9281, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6492260052433554e-05, |
|
"loss": 0.9072, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6402991019342073e-05, |
|
"loss": 0.9166, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.631284866678205e-05, |
|
"loss": 0.8633, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6221845289487493e-05, |
|
"loss": 0.9126, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6129993299629652e-05, |
|
"loss": 0.9278, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.6037305225124122e-05, |
|
"loss": 0.895, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5943793707922086e-05, |
|
"loss": 0.909, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5849471502286088e-05, |
|
"loss": 0.8707, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5754351473050434e-05, |
|
"loss": 0.9124, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5658446593866517e-05, |
|
"loss": 0.9002, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5561769945433326e-05, |
|
"loss": 0.8842, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5464334713713312e-05, |
|
"loss": 0.8894, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5366154188133962e-05, |
|
"loss": 0.9092, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.526724175977518e-05, |
|
"loss": 0.907, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.5167610919542885e-05, |
|
"loss": 0.975, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.5067275256328913e-05, |
|
"loss": 0.919, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4966248455157622e-05, |
|
"loss": 0.8805, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4864544295319357e-05, |
|
"loss": 0.8917, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4762176648491052e-05, |
|
"loss": 0.8866, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4659159476844231e-05, |
|
"loss": 0.8638, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4555506831140698e-05, |
|
"loss": 0.8955, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.445123284881609e-05, |
|
"loss": 0.9303, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4346351752051663e-05, |
|
"loss": 0.8765, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4240877845834473e-05, |
|
"loss": 0.8824, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4134825516006307e-05, |
|
"loss": 0.8933, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.4028209227301534e-05, |
|
"loss": 0.8633, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.392104352137426e-05, |
|
"loss": 0.8933, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3813343014814926e-05, |
|
"loss": 0.8914, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3705122397156727e-05, |
|
"loss": 0.8869, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.359639642887208e-05, |
|
"loss": 0.8688, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3487179939359394e-05, |
|
"loss": 0.9112, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3377487824920459e-05, |
|
"loss": 0.8622, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.32673350467287e-05, |
|
"loss": 0.9195, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3156736628788585e-05, |
|
"loss": 0.9125, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.304570765588648e-05, |
|
"loss": 0.8975, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.293426327153317e-05, |
|
"loss": 0.8466, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2822418675898428e-05, |
|
"loss": 0.9109, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2710189123737804e-05, |
|
"loss": 0.8949, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2597589922312009e-05, |
|
"loss": 0.8578, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2484636429299113e-05, |
|
"loss": 0.8887, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2371344050699872e-05, |
|
"loss": 0.8935, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2257728238736468e-05, |
|
"loss": 0.8413, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2143804489744941e-05, |
|
"loss": 0.8918, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2029588342061623e-05, |
|
"loss": 0.8996, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1915095373903789e-05, |
|
"loss": 0.8716, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1800341201244954e-05, |
|
"loss": 0.8695, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1685341475684935e-05, |
|
"loss": 0.9327, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.15701118823151e-05, |
|
"loss": 0.8849, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1454668137579059e-05, |
|
"loss": 0.8831, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1339025987129033e-05, |
|
"loss": 0.8848, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1223201203678289e-05, |
|
"loss": 0.8591, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1107209584849845e-05, |
|
"loss": 0.8592, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0991066951021802e-05, |
|
"loss": 0.8658, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0874789143169569e-05, |
|
"loss": 0.9167, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0758392020705258e-05, |
|
"loss": 0.8967, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0641891459314598e-05, |
|
"loss": 0.9196, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0525303348791599e-05, |
|
"loss": 0.8851, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0408643590871312e-05, |
|
"loss": 0.8484, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.029192809706095e-05, |
|
"loss": 0.9097, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.017517278646968e-05, |
|
"loss": 0.9015, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0058393583637376e-05, |
|
"loss": 0.8429, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.94160641636263e-06, |
|
"loss": 0.8631, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.824827213530323e-06, |
|
"loss": 0.8986, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.708071902939053e-06, |
|
"loss": 0.897, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.591356409128691e-06, |
|
"loss": 0.8773, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.474696651208406e-06, |
|
"loss": 0.8805, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.358108540685406e-06, |
|
"loss": 0.8887, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.241607979294745e-06, |
|
"loss": 0.8689, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.125210856830433e-06, |
|
"loss": 0.8726, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.0089330489782e-06, |
|
"loss": 0.9044, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.892790415150161e-06, |
|
"loss": 0.8903, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.776798796321715e-06, |
|
"loss": 0.8569, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.66097401287097e-06, |
|
"loss": 0.8636, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.545331862420945e-06, |
|
"loss": 0.9027, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.429888117684904e-06, |
|
"loss": 0.8581, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.314658524315068e-06, |
|
"loss": 0.8882, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.199658798755048e-06, |
|
"loss": 0.8915, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.084904626096211e-06, |
|
"loss": 0.9091, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.970411657938382e-06, |
|
"loss": 0.8708, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.856195510255059e-06, |
|
"loss": 0.8687, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.742271761263537e-06, |
|
"loss": 0.8653, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.628655949300133e-06, |
|
"loss": 0.8938, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.51536357070089e-06, |
|
"loss": 0.8943, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.402410077687994e-06, |
|
"loss": 0.871, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.2898108762622e-06, |
|
"loss": 0.8611, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.1775813241015755e-06, |
|
"loss": 0.9223, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.065736728466832e-06, |
|
"loss": 0.8372, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.9542923441135226e-06, |
|
"loss": 0.8493, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.843263371211415e-06, |
|
"loss": 0.8764, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.732664953271305e-06, |
|
"loss": 0.8427, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.622512175079543e-06, |
|
"loss": 0.9206, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.512820060640608e-06, |
|
"loss": 0.9046, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.403603571127921e-06, |
|
"loss": 0.9025, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.294877602843276e-06, |
|
"loss": 0.8967, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.186656985185078e-06, |
|
"loss": 0.8848, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.078956478625743e-06, |
|
"loss": 0.906, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5.971790772698467e-06, |
|
"loss": 0.918, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.865174483993697e-06, |
|
"loss": 0.893, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.759122154165528e-06, |
|
"loss": 0.9007, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.653648247948342e-06, |
|
"loss": 0.8744, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.548767151183912e-06, |
|
"loss": 0.917, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.444493168859304e-06, |
|
"loss": 0.8773, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.340840523155769e-06, |
|
"loss": 0.9227, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.237823351508953e-06, |
|
"loss": 0.8372, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.135455704680646e-06, |
|
"loss": 0.9047, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.03375154484238e-06, |
|
"loss": 0.8698, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.932724743671089e-06, |
|
"loss": 0.8616, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.832389080457118e-06, |
|
"loss": 0.9023, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.732758240224819e-06, |
|
"loss": 0.8613, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.633845811866044e-06, |
|
"loss": 0.8543, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.535665286286691e-06, |
|
"loss": 0.9005, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.438230054566678e-06, |
|
"loss": 0.9237, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.34155340613348e-06, |
|
"loss": 0.908, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.245648526949568e-06, |
|
"loss": 0.8667, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.150528497713911e-06, |
|
"loss": 0.8766, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.056206292077916e-06, |
|
"loss": 0.8879, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.96269477487588e-06, |
|
"loss": 0.8712, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.870006700370348e-06, |
|
"loss": 0.8465, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.778154710512513e-06, |
|
"loss": 0.9037, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.687151333217952e-06, |
|
"loss": 0.8617, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.597008980657929e-06, |
|
"loss": 0.8778, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5077399475664474e-06, |
|
"loss": 0.8629, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.419356409563361e-06, |
|
"loss": 0.8194, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.331870421493688e-06, |
|
"loss": 0.8806, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.245293915783444e-06, |
|
"loss": 0.8949, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.1596387008121386e-06, |
|
"loss": 0.8451, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.074916459302211e-06, |
|
"loss": 0.8941, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.9911387467255737e-06, |
|
"loss": 0.8887, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.9083169897275554e-06, |
|
"loss": 0.8624, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.82646248456839e-06, |
|
"loss": 0.8769, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.745586395582481e-06, |
|
"loss": 0.87, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.665699753655684e-06, |
|
"loss": 0.8524, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.586813454720771e-06, |
|
"loss": 0.8492, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.5089382582712995e-06, |
|
"loss": 0.8835, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.4320847858941167e-06, |
|
"loss": 0.8711, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.3562635198206476e-06, |
|
"loss": 0.8955, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.281484801497186e-06, |
|
"loss": 0.8767, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2077588301744234e-06, |
|
"loss": 0.8499, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.1350956615163254e-06, |
|
"loss": 0.835, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.0635052062286323e-06, |
|
"loss": 0.8427, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.992997228707103e-06, |
|
"loss": 0.8295, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.923581345705736e-06, |
|
"loss": 0.8669, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8552670250251003e-06, |
|
"loss": 0.8733, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.788063584221017e-06, |
|
"loss": 0.862, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7219801893337073e-06, |
|
"loss": 0.8726, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6570258536376083e-06, |
|
"loss": 0.8486, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5932094364120453e-06, |
|
"loss": 0.8599, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5305396417328755e-06, |
|
"loss": 0.8423, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.469025017285335e-06, |
|
"loss": 0.8835, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4086739531981886e-06, |
|
"loss": 0.9035, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3494946808993804e-06, |
|
"loss": 0.8399, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.291495271993337e-06, |
|
"loss": 0.8797, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.234683637160048e-06, |
|
"loss": 0.8579, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1790675250761263e-06, |
|
"loss": 0.8749, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.124654521357934e-06, |
|
"loss": 0.8661, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0714520475269653e-06, |
|
"loss": 0.9152, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0194673599976134e-06, |
|
"loss": 0.8602, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.687075490874376e-07, |
|
"loss": 0.8802, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.191795380501133e-07, |
|
"loss": 0.8678, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.708900821311405e-07, |
|
"loss": 0.8902, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.238457676464873e-07, |
|
"loss": 0.8982, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.780530110842566e-07, |
|
"loss": 0.8694, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.335180582295387e-07, |
|
"loss": 0.8896, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.902469833125236e-07, |
|
"loss": 0.8869, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.482456881800248e-07, |
|
"loss": 0.9181, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.075199014905153e-07, |
|
"loss": 0.814, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.680751779327742e-07, |
|
"loss": 0.885, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.299168974682789e-07, |
|
"loss": 0.8642, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.930502645974122e-07, |
|
"loss": 0.8697, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.574803076496148e-07, |
|
"loss": 0.8614, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.232118780975447e-07, |
|
"loss": 0.8606, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.9024964989539227e-07, |
|
"loss": 0.849, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.585981188413767e-07, |
|
"loss": 0.8665, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.2826160196455124e-07, |
|
"loss": 0.8916, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.9924423693600157e-07, |
|
"loss": 0.846, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7154998150449643e-07, |
|
"loss": 0.8738, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.4518261295667255e-07, |
|
"loss": 0.8699, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.201457276018526e-07, |
|
"loss": 0.8869, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.9644274028152944e-07, |
|
"loss": 0.8696, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.740768839036111e-07, |
|
"loss": 0.8931, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5305120900146908e-07, |
|
"loss": 0.887, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3336858331787993e-07, |
|
"loss": 0.8705, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1503169141388049e-07, |
|
"loss": 0.8813, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.804303430261175e-08, |
|
"loss": 0.8476, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.240492910820407e-08, |
|
"loss": 0.8973, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.811950874973994e-08, |
|
"loss": 0.8578, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.518872165033329e-08, |
|
"loss": 0.8851, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.361433147138772e-08, |
|
"loss": 0.8397, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.339791687203997e-08, |
|
"loss": 0.8525, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.4540871293845526e-08, |
|
"loss": 0.8295, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.7044402770725055e-08, |
|
"loss": 0.8433, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0909533764194013e-08, |
|
"loss": 0.8829, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.137101023910852e-09, |
|
"loss": 0.8685, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7277554735449797e-09, |
|
"loss": 0.8855, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.819621220033323e-10, |
|
"loss": 0.8618, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.8748, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 278, |
|
"total_flos": 4.5714113651172966e+17, |
|
"train_loss": 0.8846922922048638, |
|
"train_runtime": 5115.5251, |
|
"train_samples_per_second": 17.336, |
|
"train_steps_per_second": 0.054 |
|
} |
|
], |
|
"max_steps": 278, |
|
"num_train_epochs": 1, |
|
"total_flos": 4.5714113651172966e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|