|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.629735935706085, |
|
"eval_steps": 500, |
|
"global_step": 1378, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019999989520267283, |
|
"loss": 1.377, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999958081091102, |
|
"loss": 1.2613, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999905682537348, |
|
"loss": 1.1324, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999832324715846, |
|
"loss": 1.0451, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999738007780348, |
|
"loss": 1.0011, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001999962273192854, |
|
"loss": 0.9464, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019999486497402038, |
|
"loss": 0.9247, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019999329304486377, |
|
"loss": 0.8769, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019999151153511023, |
|
"loss": 0.8683, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019998952044849376, |
|
"loss": 0.8325, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019998731978918756, |
|
"loss": 0.7987, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019998490956180405, |
|
"loss": 0.7877, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000199982289771395, |
|
"loss": 0.7496, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019997946042345127, |
|
"loss": 0.7487, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019997642152390314, |
|
"loss": 0.7427, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001999731730791199, |
|
"loss": 0.718, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019996971509591012, |
|
"loss": 0.7124, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019996604758152158, |
|
"loss": 0.7083, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019996217054364115, |
|
"loss": 0.6978, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019995808399039496, |
|
"loss": 0.6929, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019995378793034814, |
|
"loss": 0.6645, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000199949282372505, |
|
"loss": 0.6619, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019994456732630903, |
|
"loss": 0.666, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019993964280164264, |
|
"loss": 0.6554, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019993450880882735, |
|
"loss": 0.6517, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019992916535862385, |
|
"loss": 0.6453, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019992361246223158, |
|
"loss": 0.644, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019991785013128923, |
|
"loss": 0.6352, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001999118783778743, |
|
"loss": 0.6269, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019990569721450326, |
|
"loss": 0.6274, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019989930665413147, |
|
"loss": 0.6236, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019989270671015325, |
|
"loss": 0.6175, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019988589739640172, |
|
"loss": 0.6203, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019987887872714878, |
|
"loss": 0.6084, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019987165071710527, |
|
"loss": 0.6063, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001998642133814207, |
|
"loss": 0.6059, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019985656673568328, |
|
"loss": 0.6011, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019984871079591998, |
|
"loss": 0.6, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019984064557859648, |
|
"loss": 0.6012, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019983237110061697, |
|
"loss": 0.5957, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001998238873793244, |
|
"loss": 0.6014, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001998151944325001, |
|
"loss": 0.5976, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001998062922783641, |
|
"loss": 0.5934, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001997971809355748, |
|
"loss": 0.5947, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001997878604232291, |
|
"loss": 0.5852, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019977833076086229, |
|
"loss": 0.5919, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019976859196844804, |
|
"loss": 0.5752, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019975864406639833, |
|
"loss": 0.5744, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019974848707556345, |
|
"loss": 0.5782, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019973812101723188, |
|
"loss": 0.5759, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019972754591313034, |
|
"loss": 0.5714, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019971676178542368, |
|
"loss": 0.5622, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019970576865671488, |
|
"loss": 0.5687, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001996945665500449, |
|
"loss": 0.5712, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001996831554888928, |
|
"loss": 0.5655, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019967153549717553, |
|
"loss": 0.5612, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.000199659706599248, |
|
"loss": 0.5619, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001996476688199029, |
|
"loss": 0.5624, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001996354221843708, |
|
"loss": 0.5701, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019962296671832003, |
|
"loss": 0.5636, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001996103024478565, |
|
"loss": 0.5652, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019959742939952392, |
|
"loss": 0.5652, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019958434760030346, |
|
"loss": 0.5581, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001995710570776139, |
|
"loss": 0.5585, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019955755785931145, |
|
"loss": 0.5539, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019954384997368976, |
|
"loss": 0.5562, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019952993344947984, |
|
"loss": 0.5521, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019951580831584993, |
|
"loss": 0.5522, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001995014746024056, |
|
"loss": 0.5546, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019948693233918952, |
|
"loss": 0.5513, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019947218155668152, |
|
"loss": 0.5574, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019945722228579846, |
|
"loss": 0.5457, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019944205455789415, |
|
"loss": 0.5487, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019942667840475931, |
|
"loss": 0.5464, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001994110938586216, |
|
"loss": 0.5435, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019939530095214536, |
|
"loss": 0.5516, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019937929971843165, |
|
"loss": 0.5482, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019936309019101826, |
|
"loss": 0.542, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019934667240387944, |
|
"loss": 0.5375, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019933004639142605, |
|
"loss": 0.5394, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001993132121885053, |
|
"loss": 0.5413, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019929616983040073, |
|
"loss": 0.5382, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019927891935283225, |
|
"loss": 0.5352, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019926146079195594, |
|
"loss": 0.5415, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019924379418436404, |
|
"loss": 0.5315, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019922591956708478, |
|
"loss": 0.5352, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001992078369775824, |
|
"loss": 0.5491, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019918954645375706, |
|
"loss": 0.5348, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019917104803394468, |
|
"loss": 0.534, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000199152341756917, |
|
"loss": 0.5417, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019913342766188138, |
|
"loss": 0.5326, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019911430578848074, |
|
"loss": 0.5308, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019909497617679348, |
|
"loss": 0.5311, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019907543886733348, |
|
"loss": 0.5348, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019905569390104986, |
|
"loss": 0.5337, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019903574131932702, |
|
"loss": 0.5378, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019901558116398448, |
|
"loss": 0.5289, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001989952134772769, |
|
"loss": 0.529, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019897463830189388, |
|
"loss": 0.528, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019895385568095982, |
|
"loss": 0.5201, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000198932865658034, |
|
"loss": 0.537, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019891166827711037, |
|
"loss": 0.5262, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019889026358261758, |
|
"loss": 0.5379, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019886865161941866, |
|
"loss": 0.531, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019884683243281116, |
|
"loss": 0.525, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001988248060685269, |
|
"loss": 0.5221, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019880257257273197, |
|
"loss": 0.5204, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019878013199202662, |
|
"loss": 0.5289, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001987574843734451, |
|
"loss": 0.518, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019873462976445553, |
|
"loss": 0.5253, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019871156821296007, |
|
"loss": 0.5255, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019868829976729443, |
|
"loss": 0.5276, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019866482447622802, |
|
"loss": 0.5192, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019864114238896384, |
|
"loss": 0.5165, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019861725355513823, |
|
"loss": 0.5185, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019859315802482096, |
|
"loss": 0.5205, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001985688558485149, |
|
"loss": 0.5187, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001985443470771562, |
|
"loss": 0.5214, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019851963176211387, |
|
"loss": 0.5195, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019849470995518992, |
|
"loss": 0.5286, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001984695817086191, |
|
"loss": 0.5184, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001984442470750689, |
|
"loss": 0.5131, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019841870610763938, |
|
"loss": 0.5225, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00019839295885986296, |
|
"loss": 0.51, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00019836700538570457, |
|
"loss": 0.5201, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00019834084573956128, |
|
"loss": 0.517, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001983144799762623, |
|
"loss": 0.5176, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00019828790815106887, |
|
"loss": 0.5255, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001982611303196741, |
|
"loss": 0.5221, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001982341465382029, |
|
"loss": 0.5107, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019820695686321185, |
|
"loss": 0.5196, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00019817956135168908, |
|
"loss": 0.513, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00019815196006105402, |
|
"loss": 0.5175, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0001981241530491576, |
|
"loss": 0.5188, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00019809614037428176, |
|
"loss": 0.5123, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00019806792209513968, |
|
"loss": 0.5089, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001980394982708752, |
|
"loss": 0.5117, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 0.5121904611587524, |
|
"eval_runtime": 28.8865, |
|
"eval_samples_per_second": 6.924, |
|
"eval_steps_per_second": 0.865, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00019801086896106327, |
|
"loss": 0.5101, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019798203422570932, |
|
"loss": 0.5079, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019795299412524945, |
|
"loss": 0.5082, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019792374872055018, |
|
"loss": 0.5159, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019789429807290823, |
|
"loss": 0.5135, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019786464224405065, |
|
"loss": 0.5111, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019783478129613448, |
|
"loss": 0.51, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019780471529174664, |
|
"loss": 0.5014, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019777444429390387, |
|
"loss": 0.5169, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019774396836605255, |
|
"loss": 0.5144, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019771328757206864, |
|
"loss": 0.5075, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019768240197625742, |
|
"loss": 0.5074, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019765131164335345, |
|
"loss": 0.5056, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001976200166385204, |
|
"loss": 0.5025, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001975885170273509, |
|
"loss": 0.5089, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019755681287586652, |
|
"loss": 0.5077, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019752490425051743, |
|
"loss": 0.5185, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019749279121818235, |
|
"loss": 0.5096, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0001974604738461686, |
|
"loss": 0.5089, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019742795220221155, |
|
"loss": 0.5075, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019739522635447488, |
|
"loss": 0.509, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019736229637155018, |
|
"loss": 0.5009, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.000197329162322457, |
|
"loss": 0.502, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0001972958242766425, |
|
"loss": 0.5106, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019726228230398146, |
|
"loss": 0.4997, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00019722853647477598, |
|
"loss": 0.4935, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0001971945868597556, |
|
"loss": 0.5075, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00019716043353007693, |
|
"loss": 0.5019, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00019712607655732338, |
|
"loss": 0.5011, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00019709151601350546, |
|
"loss": 0.5019, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00019705675197106016, |
|
"loss": 0.5005, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001970217845028511, |
|
"loss": 0.5074, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00019698661368216817, |
|
"loss": 0.4961, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00019695123958272758, |
|
"loss": 0.499, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00019691566227867153, |
|
"loss": 0.4997, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00019687988184456814, |
|
"loss": 0.502, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00019684389835541129, |
|
"loss": 0.5003, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00019680771188662044, |
|
"loss": 0.4884, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001967713225140405, |
|
"loss": 0.4958, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001967347303139417, |
|
"loss": 0.4957, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00019669793536301926, |
|
"loss": 0.5016, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00019666093773839345, |
|
"loss": 0.5025, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00019662373751760934, |
|
"loss": 0.5035, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00019658633477863662, |
|
"loss": 0.5028, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00019654872959986937, |
|
"loss": 0.5011, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00019651092206012603, |
|
"loss": 0.4915, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00019647291223864928, |
|
"loss": 0.494, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001964347002151056, |
|
"loss": 0.5037, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00019639628606958533, |
|
"loss": 0.4965, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00019635766988260254, |
|
"loss": 0.4897, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001963188517350946, |
|
"loss": 0.4952, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00019627983170842234, |
|
"loss": 0.4903, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00019624060988436966, |
|
"loss": 0.4982, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00019620118634514335, |
|
"loss": 0.4999, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00019616156117337305, |
|
"loss": 0.4917, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00019612173445211106, |
|
"loss": 0.4878, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00019608170626483199, |
|
"loss": 0.4946, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00019604147669543282, |
|
"loss": 0.4968, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001960010458282326, |
|
"loss": 0.4891, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00019596041374797218, |
|
"loss": 0.4952, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00019591958053981432, |
|
"loss": 0.4928, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00019587854628934319, |
|
"loss": 0.4998, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001958373110825644, |
|
"loss": 0.496, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00019579587500590472, |
|
"loss": 0.4926, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00019575423814621198, |
|
"loss": 0.4939, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0001957124005907548, |
|
"loss": 0.495, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00019567036242722249, |
|
"loss": 0.4962, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00019562812374372473, |
|
"loss": 0.4931, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00019558568462879158, |
|
"loss": 0.4843, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00019554304517137316, |
|
"loss": 0.4963, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00019550020546083949, |
|
"loss": 0.4979, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001954571655869803, |
|
"loss": 0.4925, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00019541392564000488, |
|
"loss": 0.4953, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00019537048571054185, |
|
"loss": 0.4921, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.000195326845889639, |
|
"loss": 0.4917, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.000195283006268763, |
|
"loss": 0.4956, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00019523896693979936, |
|
"loss": 0.4901, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001951947279950522, |
|
"loss": 0.4865, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.000195150289527244, |
|
"loss": 0.4871, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 0.4948, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00019506081439542495, |
|
"loss": 0.4861, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00019501577791894922, |
|
"loss": 0.4852, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00019497054229448223, |
|
"loss": 0.4754, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00019492510761683537, |
|
"loss": 0.4892, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00019487947398123736, |
|
"loss": 0.4894, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00019483364148333384, |
|
"loss": 0.4906, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00019478761021918728, |
|
"loss": 0.4932, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00019474138028527675, |
|
"loss": 0.4863, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00019469495177849768, |
|
"loss": 0.494, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00019464832479616182, |
|
"loss": 0.4893, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00019460149943599674, |
|
"loss": 0.4942, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00019455447579614594, |
|
"loss": 0.4893, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001945072539751685, |
|
"loss": 0.4855, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00019445983407203872, |
|
"loss": 0.4828, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00019441221618614628, |
|
"loss": 0.4928, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00019436440041729569, |
|
"loss": 0.483, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00019431638686570623, |
|
"loss": 0.4854, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00019426817563201177, |
|
"loss": 0.4906, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00019421976681726046, |
|
"loss": 0.4856, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00019417116052291458, |
|
"loss": 0.4872, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00019412235685085035, |
|
"loss": 0.4786, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001940733559033576, |
|
"loss": 0.49, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00019402415778313977, |
|
"loss": 0.4899, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00019397476259331344, |
|
"loss": 0.4839, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001939251704374083, |
|
"loss": 0.4817, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001938753814193669, |
|
"loss": 0.4818, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00019382539564354433, |
|
"loss": 0.4865, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00019377521321470805, |
|
"loss": 0.4935, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00019372483423803783, |
|
"loss": 0.4879, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00019367425881912525, |
|
"loss": 0.4833, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00019362348706397373, |
|
"loss": 0.4782, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00019357251907899814, |
|
"loss": 0.4805, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00019352135497102463, |
|
"loss": 0.4881, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00019346999484729047, |
|
"loss": 0.4766, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00019341843881544372, |
|
"loss": 0.4799, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00019336668698354304, |
|
"loss": 0.4875, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00019331473946005754, |
|
"loss": 0.4834, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00019326259635386644, |
|
"loss": 0.4852, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00019321025777425892, |
|
"loss": 0.477, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0001931577238309338, |
|
"loss": 0.4849, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00019310499463399947, |
|
"loss": 0.4854, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00019305207029397348, |
|
"loss": 0.4869, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001929989509217824, |
|
"loss": 0.4772, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00019294563662876165, |
|
"loss": 0.4867, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00019289212752665507, |
|
"loss": 0.4806, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00019283842372761493, |
|
"loss": 0.4839, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00019278452534420145, |
|
"loss": 0.4857, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00019273043248938288, |
|
"loss": 0.477, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00019267614527653488, |
|
"loss": 0.4868, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00019262166381944052, |
|
"loss": 0.485, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00019256698823229008, |
|
"loss": 0.485, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00019251211862968059, |
|
"loss": 0.4824, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001924570551266159, |
|
"loss": 0.4858, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00019240179783850612, |
|
"loss": 0.4793, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00019234634688116757, |
|
"loss": 0.4762, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00019229070237082252, |
|
"loss": 0.4808, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00019223486442409882, |
|
"loss": 0.4704, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00019217883315802991, |
|
"loss": 0.4733, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 0.4869104027748108, |
|
"eval_runtime": 28.3583, |
|
"eval_samples_per_second": 7.053, |
|
"eval_steps_per_second": 0.882, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00019212260869005428, |
|
"loss": 0.4777, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0001920661911380154, |
|
"loss": 0.4813, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00019200958062016144, |
|
"loss": 0.48, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0001919527772551451, |
|
"loss": 0.4831, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00019189578116202307, |
|
"loss": 0.4649, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0001918385924602562, |
|
"loss": 0.4728, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00019178121126970895, |
|
"loss": 0.4836, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0001917236377106492, |
|
"loss": 0.4833, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00019166587190374808, |
|
"loss": 0.4733, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00019160791397007957, |
|
"loss": 0.483, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001915497640311205, |
|
"loss": 0.4731, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001914914222087499, |
|
"loss": 0.4825, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00019143288862524926, |
|
"loss": 0.4756, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00019137416340330175, |
|
"loss": 0.4796, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00019131524666599233, |
|
"loss": 0.479, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00019125613853680727, |
|
"loss": 0.4773, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00019119683913963417, |
|
"loss": 0.4782, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00019113734859876126, |
|
"loss": 0.48, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00019107766703887764, |
|
"loss": 0.4789, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00019101779458507263, |
|
"loss": 0.4792, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00019095773136283567, |
|
"loss": 0.4819, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00019089747749805608, |
|
"loss": 0.4715, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00019083703311702279, |
|
"loss": 0.4677, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00019077639834642388, |
|
"loss": 0.4688, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00019071557331334669, |
|
"loss": 0.4726, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00019065455814527718, |
|
"loss": 0.4777, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00019059335297009992, |
|
"loss": 0.4722, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00019053195791609765, |
|
"loss": 0.4827, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00019047037311195118, |
|
"loss": 0.4705, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00019040859868673887, |
|
"loss": 0.4725, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00019034663476993668, |
|
"loss": 0.4715, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00019028448149141766, |
|
"loss": 0.4779, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00019022213898145176, |
|
"loss": 0.4637, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00019015960737070556, |
|
"loss": 0.4794, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 0.4749, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00019003397737151989, |
|
"loss": 0.4724, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0001899708792463942, |
|
"loss": 0.4665, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00018990759254711517, |
|
"loss": 0.4668, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0001898441174063283, |
|
"loss": 0.475, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00018978045395707418, |
|
"loss": 0.4661, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0001897166023327879, |
|
"loss": 0.4732, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00018965256266729913, |
|
"loss": 0.4749, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00018958833509483155, |
|
"loss": 0.4762, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00018952391975000268, |
|
"loss": 0.4732, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00018945931676782373, |
|
"loss": 0.4773, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00018939452628369898, |
|
"loss": 0.4759, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00018932954843342591, |
|
"loss": 0.4681, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00018926438335319462, |
|
"loss": 0.4693, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00018919903117958756, |
|
"loss": 0.4684, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0001891334920495795, |
|
"loss": 0.4665, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00018906776610053686, |
|
"loss": 0.4683, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00018900185347021776, |
|
"loss": 0.4708, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00018893575429677157, |
|
"loss": 0.4659, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00018886946871873856, |
|
"loss": 0.466, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0001888029968750498, |
|
"loss": 0.4724, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00018873633890502674, |
|
"loss": 0.4686, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00018866949494838084, |
|
"loss": 0.469, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0001886024651452136, |
|
"loss": 0.4659, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.00018853524963601575, |
|
"loss": 0.4707, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0001884678485616675, |
|
"loss": 0.4687, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00018840026206343784, |
|
"loss": 0.471, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00018833249028298455, |
|
"loss": 0.4644, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00018826453336235358, |
|
"loss": 0.4708, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.000188196391443979, |
|
"loss": 0.4664, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00018812806467068268, |
|
"loss": 0.4771, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0001880595531856738, |
|
"loss": 0.4685, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00018799085713254888, |
|
"loss": 0.4635, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0001879219766552911, |
|
"loss": 0.4751, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0001878529118982703, |
|
"loss": 0.469, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00018778366300624245, |
|
"loss": 0.4735, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0001877142301243496, |
|
"loss": 0.4617, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00018764461339811935, |
|
"loss": 0.4664, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0001875748129734646, |
|
"loss": 0.4717, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00018750482899668332, |
|
"loss": 0.4676, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00018743466161445823, |
|
"loss": 0.4665, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00018736431097385634, |
|
"loss": 0.4635, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0001872937772223289, |
|
"loss": 0.4676, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0001872230605077108, |
|
"loss": 0.4666, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00018715216097822058, |
|
"loss": 0.4673, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00018708107878245977, |
|
"loss": 0.4632, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00018700981406941298, |
|
"loss": 0.4659, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0001869383669884471, |
|
"loss": 0.4683, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00018686673768931154, |
|
"loss": 0.4715, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00018679492632213735, |
|
"loss": 0.4748, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00018672293303743738, |
|
"loss": 0.4596, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00018665075798610567, |
|
"loss": 0.4586, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0001865784013194173, |
|
"loss": 0.4609, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00018650586318902802, |
|
"loss": 0.467, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00018643314374697378, |
|
"loss": 0.4602, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00018636024314567067, |
|
"loss": 0.4709, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0001862871615379145, |
|
"loss": 0.471, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00018621389907688037, |
|
"loss": 0.4605, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0001861404559161225, |
|
"loss": 0.468, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00018606683220957383, |
|
"loss": 0.4632, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00018599302811154572, |
|
"loss": 0.4633, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00018591904377672757, |
|
"loss": 0.4692, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00018584487936018661, |
|
"loss": 0.4693, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00018577053501736752, |
|
"loss": 0.4566, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0001856960109040921, |
|
"loss": 0.4679, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00018562130717655878, |
|
"loss": 0.4611, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0001855464239913427, |
|
"loss": 0.4654, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.000185471361505395, |
|
"loss": 0.4626, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00018539611987604258, |
|
"loss": 0.4667, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0001853206992609879, |
|
"loss": 0.4624, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00018524509981830852, |
|
"loss": 0.4602, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0001851693217064569, |
|
"loss": 0.4644, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00018509336508425986, |
|
"loss": 0.4664, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.0001850172301109184, |
|
"loss": 0.4665, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00018494091694600738, |
|
"loss": 0.4629, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00018486442574947511, |
|
"loss": 0.4609, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00018478775668164305, |
|
"loss": 0.4593, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00018471090990320547, |
|
"loss": 0.4625, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0001846338855752291, |
|
"loss": 0.4573, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00018455668385915284, |
|
"loss": 0.4578, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00018447930491678733, |
|
"loss": 0.4649, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0001844017489103147, |
|
"loss": 0.4641, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00018432401600228823, |
|
"loss": 0.4545, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.0001842461063556319, |
|
"loss": 0.4574, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00018416802013364016, |
|
"loss": 0.4604, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00018408975749997759, |
|
"loss": 0.4628, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00018401131861867846, |
|
"loss": 0.469, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0001839327036541465, |
|
"loss": 0.4679, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00018385391277115444, |
|
"loss": 0.464, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00018377494613484378, |
|
"loss": 0.4658, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00018369580391072433, |
|
"loss": 0.4631, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00018361648626467406, |
|
"loss": 0.4567, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.0001835369933629384, |
|
"loss": 0.4617, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00018345732537213027, |
|
"loss": 0.4632, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00018337748245922955, |
|
"loss": 0.4609, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00018329746479158265, |
|
"loss": 0.4571, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0001832172725369024, |
|
"loss": 0.4662, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00018313690586326743, |
|
"loss": 0.4649, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00018305636493912202, |
|
"loss": 0.4582, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00018297564993327562, |
|
"loss": 0.4627, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00018289476101490256, |
|
"loss": 0.4547, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0001828136983535417, |
|
"loss": 0.4623, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00018273246211909604, |
|
"loss": 0.4549, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00018265105248183242, |
|
"loss": 0.4585, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.4760552644729614, |
|
"eval_runtime": 28.2621, |
|
"eval_samples_per_second": 7.077, |
|
"eval_steps_per_second": 0.885, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.000182569469612381, |
|
"loss": 0.4544, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00018248771368173524, |
|
"loss": 0.4522, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00018240578486125112, |
|
"loss": 0.4587, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.00018232368332264708, |
|
"loss": 0.4565, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.00018224140923800354, |
|
"loss": 0.4661, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0001821589627797626, |
|
"loss": 0.4553, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.00018207634412072764, |
|
"loss": 0.4566, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.00018199355343406296, |
|
"loss": 0.4582, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00018191059089329333, |
|
"loss": 0.4577, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00018182745667230394, |
|
"loss": 0.4609, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00018174415094533957, |
|
"loss": 0.4656, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0001816606738870046, |
|
"loss": 0.4561, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00018157702567226248, |
|
"loss": 0.4582, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00018149320647643541, |
|
"loss": 0.4552, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00018140921647520392, |
|
"loss": 0.4623, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00018132505584460658, |
|
"loss": 0.4602, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00018124072476103956, |
|
"loss": 0.4647, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00018115622340125631, |
|
"loss": 0.4606, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00018107155194236718, |
|
"loss": 0.4661, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.00018098671056183897, |
|
"loss": 0.4525, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.4534, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.00018081651874751327, |
|
"loss": 0.4552, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.00018073116867042862, |
|
"loss": 0.4536, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.00018064564938513012, |
|
"loss": 0.4575, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.00018055996107086157, |
|
"loss": 0.4505, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0001804741039072211, |
|
"loss": 0.4557, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00018038807807416068, |
|
"loss": 0.4523, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00018030188375198593, |
|
"loss": 0.4535, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00018021552112135552, |
|
"loss": 0.4579, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00018012899036328093, |
|
"loss": 0.4509, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00018004229165912596, |
|
"loss": 0.4583, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00017995542519060647, |
|
"loss": 0.4574, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.00017986839113978996, |
|
"loss": 0.451, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.00017978118968909508, |
|
"loss": 0.4511, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.0001796938210212915, |
|
"loss": 0.4586, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.00017960628531949927, |
|
"loss": 0.4465, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00017951858276718844, |
|
"loss": 0.4502, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00017943071354817897, |
|
"loss": 0.4553, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.00017934267784664002, |
|
"loss": 0.456, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.00017925447584708973, |
|
"loss": 0.448, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00017916610773439473, |
|
"loss": 0.4509, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00017907757369376985, |
|
"loss": 0.4544, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0001789888739107778, |
|
"loss": 0.4512, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00017890000857132853, |
|
"loss": 0.4561, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00017881097786167898, |
|
"loss": 0.462, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00017872178196843286, |
|
"loss": 0.4512, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00017863242107853995, |
|
"loss": 0.455, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.00017854289537929587, |
|
"loss": 0.4562, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.00017845320505834175, |
|
"loss": 0.4512, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.00017836335030366367, |
|
"loss": 0.4532, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.00017827333130359242, |
|
"loss": 0.4526, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.000178183148246803, |
|
"loss": 0.4487, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00017809280132231425, |
|
"loss": 0.449, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00017800229071948854, |
|
"loss": 0.4507, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00017791161662803124, |
|
"loss": 0.4473, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.0001778207792379904, |
|
"loss": 0.4518, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0001777297787397563, |
|
"loss": 0.452, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0001776386153240612, |
|
"loss": 0.4549, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.00017754728918197864, |
|
"loss": 0.4459, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.00017745580050492344, |
|
"loss": 0.4518, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00017736414948465087, |
|
"loss": 0.4495, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00017727233631325664, |
|
"loss": 0.4539, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.0001771803611831762, |
|
"loss": 0.4498, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.00017708822428718458, |
|
"loss": 0.4586, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00017699592581839574, |
|
"loss": 0.4531, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00017690346597026233, |
|
"loss": 0.4536, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00017681084493657525, |
|
"loss": 0.4485, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.00017671806291146325, |
|
"loss": 0.4497, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.00017662512008939247, |
|
"loss": 0.448, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.00017653201666516615, |
|
"loss": 0.4465, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.00017643875283392406, |
|
"loss": 0.4516, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0001763453287911422, |
|
"loss": 0.4529, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00017625174473263235, |
|
"loss": 0.4552, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.00017615800085454171, |
|
"loss": 0.4474, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.00017606409735335246, |
|
"loss": 0.4542, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00017597003442588132, |
|
"loss": 0.4471, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0001758758122692791, |
|
"loss": 0.4474, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00017578143108103048, |
|
"loss": 0.4514, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0001756868910589533, |
|
"loss": 0.4539, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00017559219240119846, |
|
"loss": 0.4504, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.00017549733530624928, |
|
"loss": 0.4444, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.00017540231997292114, |
|
"loss": 0.4485, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.00017530714660036112, |
|
"loss": 0.4517, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.00017521181538804746, |
|
"loss": 0.4478, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00017511632653578936, |
|
"loss": 0.4464, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00017502068024372633, |
|
"loss": 0.4463, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.00017492487671232784, |
|
"loss": 0.4464, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.00017482891614239304, |
|
"loss": 0.4496, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.0001747327987350501, |
|
"loss": 0.4479, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.00017463652469175599, |
|
"loss": 0.444, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00017454009421429597, |
|
"loss": 0.4507, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00017444350750478316, |
|
"loss": 0.4502, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.0001743467647656581, |
|
"loss": 0.4535, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00017424986619968844, |
|
"loss": 0.444, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00017415281200996842, |
|
"loss": 0.4498, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.00017405560239991833, |
|
"loss": 0.454, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.00017395823757328444, |
|
"loss": 0.4479, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00017386071773413817, |
|
"loss": 0.4431, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00017376304308687587, |
|
"loss": 0.4455, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.0001736652138362184, |
|
"loss": 0.4512, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.00017356723018721067, |
|
"loss": 0.4504, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00017346909234522109, |
|
"loss": 0.4422, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00017337080051594138, |
|
"loss": 0.4535, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00017327235490538598, |
|
"loss": 0.4457, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.00017317375571989158, |
|
"loss": 0.4519, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.00017307500316611686, |
|
"loss": 0.446, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.00017297609745104184, |
|
"loss": 0.4501, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.00017287703878196762, |
|
"loss": 0.4427, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0001727778273665159, |
|
"loss": 0.4429, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.00017267846341262848, |
|
"loss": 0.4423, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0001725789471285669, |
|
"loss": 0.4458, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.000172479278722912, |
|
"loss": 0.4452, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.0001723794584045634, |
|
"loss": 0.4426, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00017227948638273916, |
|
"loss": 0.4484, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.0001721793628669753, |
|
"loss": 0.4431, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00017207908806712535, |
|
"loss": 0.4424, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.0001719786621933599, |
|
"loss": 0.4469, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.00017187808545616627, |
|
"loss": 0.4492, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.00017177735806634789, |
|
"loss": 0.4464, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.00017167648023502395, |
|
"loss": 0.4395, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.0001715754521736291, |
|
"loss": 0.4483, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00017147427409391265, |
|
"loss": 0.4494, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00017137294620793848, |
|
"loss": 0.4474, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.00017127146872808447, |
|
"loss": 0.4502, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.00017116984186704194, |
|
"loss": 0.4417, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00017106806583781534, |
|
"loss": 0.4472, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00017096614085372185, |
|
"loss": 0.4442, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00017086406712839072, |
|
"loss": 0.4437, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.0001707618448757631, |
|
"loss": 0.4473, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.00017065947431009126, |
|
"loss": 0.4435, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.00017055695564593853, |
|
"loss": 0.4458, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0001704542890981785, |
|
"loss": 0.4442, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.00017035147488199482, |
|
"loss": 0.4444, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.00017024851321288052, |
|
"loss": 0.4455, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0001701454043066378, |
|
"loss": 0.4467, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.00017004214837937738, |
|
"loss": 0.4439, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.00016993874564751822, |
|
"loss": 0.4376, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.00016983519632778686, |
|
"loss": 0.4408, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_loss": 0.4704599976539612, |
|
"eval_runtime": 28.2725, |
|
"eval_samples_per_second": 7.074, |
|
"eval_steps_per_second": 0.884, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00016973150063721718, |
|
"loss": 0.4355, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.0001696276587931498, |
|
"loss": 0.4467, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.00016952367101323162, |
|
"loss": 0.438, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00016941953751541553, |
|
"loss": 0.4454, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00016931525851795977, |
|
"loss": 0.4458, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0001692108342394275, |
|
"loss": 0.4359, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.00016910626489868649, |
|
"loss": 0.4477, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.00016900155071490844, |
|
"loss": 0.4363, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.00016889669190756868, |
|
"loss": 0.448, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.00016879168869644566, |
|
"loss": 0.4406, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00016868654130162056, |
|
"loss": 0.446, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00016858124994347665, |
|
"loss": 0.4448, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.00016847581484269897, |
|
"loss": 0.4432, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.00016837023622027388, |
|
"loss": 0.4376, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00016826451429748852, |
|
"loss": 0.4498, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00016815864929593043, |
|
"loss": 0.4434, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00016805264143748694, |
|
"loss": 0.4466, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00016794649094434486, |
|
"loss": 0.4378, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00016784019803899, |
|
"loss": 0.4401, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00016773376294420652, |
|
"loss": 0.454, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00016762718588307678, |
|
"loss": 0.4412, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00016752046707898055, |
|
"loss": 0.443, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00016741360675559473, |
|
"loss": 0.429, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00016730660513689292, |
|
"loss": 0.4325, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0001671994624471447, |
|
"loss": 0.4462, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.00016709217891091547, |
|
"loss": 0.4417, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.00016698475475306578, |
|
"loss": 0.4397, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.00016687719019875088, |
|
"loss": 0.4433, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.0001667694854734204, |
|
"loss": 0.4382, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00016666164080281765, |
|
"loss": 0.4325, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00016655365641297929, |
|
"loss": 0.4425, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00016644553253023484, |
|
"loss": 0.439, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00016633726938120616, |
|
"loss": 0.4433, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00016622886719280705, |
|
"loss": 0.4399, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0001661203261922427, |
|
"loss": 0.4387, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.00016601164660700918, |
|
"loss": 0.438, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.00016590282866489319, |
|
"loss": 0.4402, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.00016579387259397127, |
|
"loss": 0.4404, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.0001656847786226095, |
|
"loss": 0.4344, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00016557554697946308, |
|
"loss": 0.4362, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0001654661778934756, |
|
"loss": 0.4334, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0001653566715938789, |
|
"loss": 0.4401, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00016524702831019228, |
|
"loss": 0.4393, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00016513724827222227, |
|
"loss": 0.4356, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00016502733171006183, |
|
"loss": 0.4411, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0001649172788540903, |
|
"loss": 0.4404, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0001648070899349726, |
|
"loss": 0.4381, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.00016469676518365874, |
|
"loss": 0.4444, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.00016458630483138356, |
|
"loss": 0.4369, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00016447570910966603, |
|
"loss": 0.4361, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00016436497825030884, |
|
"loss": 0.4278, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00016425411248539805, |
|
"loss": 0.4392, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00016414311204730227, |
|
"loss": 0.438, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0001640319771686725, |
|
"loss": 0.4361, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.00016392070808244155, |
|
"loss": 0.4302, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.00016380930502182345, |
|
"loss": 0.4342, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.0001636977682203131, |
|
"loss": 0.4379, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.0001635860979116856, |
|
"loss": 0.433, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.00016347429432999602, |
|
"loss": 0.4451, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.00016336235770957863, |
|
"loss": 0.4356, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.00016325028828504662, |
|
"loss": 0.4348, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.00016313808629129156, |
|
"loss": 0.4389, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.0001630257519634828, |
|
"loss": 0.4287, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00016291328553706704, |
|
"loss": 0.4381, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00016280068724776797, |
|
"loss": 0.439, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00016268795733158552, |
|
"loss": 0.4384, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00016257509602479563, |
|
"loss": 0.4389, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00016246210356394953, |
|
"loss": 0.4391, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00016234898018587337, |
|
"loss": 0.4393, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.00016223572612766773, |
|
"loss": 0.428, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.00016212234162670704, |
|
"loss": 0.4375, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.00016200882692063917, |
|
"loss": 0.4392, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.00016189518224738486, |
|
"loss": 0.4342, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.0001617814078451373, |
|
"loss": 0.4408, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.0001616675039523615, |
|
"loss": 0.4375, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.000161553470807794, |
|
"loss": 0.4405, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00016143930865044213, |
|
"loss": 0.4301, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.00016132501771958367, |
|
"loss": 0.432, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0001612105982547663, |
|
"loss": 0.4318, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.0001610960504958071, |
|
"loss": 0.4351, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.0001609813746827921, |
|
"loss": 0.4364, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00016086657105607562, |
|
"loss": 0.4346, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.00016075163985627993, |
|
"loss": 0.4317, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.0001606365813242947, |
|
"loss": 0.4321, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.0001605213957012764, |
|
"loss": 0.4314, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00016040608322864808, |
|
"loss": 0.4389, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00016029064414809838, |
|
"loss": 0.4367, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00016017507870158147, |
|
"loss": 0.4361, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.00016005938713131642, |
|
"loss": 0.4345, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.0001599435696797865, |
|
"loss": 0.4337, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.00015982762658973894, |
|
"loss": 0.4413, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.00015971155810418422, |
|
"loss": 0.4385, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.0001595953644663957, |
|
"loss": 0.4299, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.00015947904591990907, |
|
"loss": 0.4294, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.00015936260270852173, |
|
"loss": 0.4328, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.00015924603507629244, |
|
"loss": 0.4281, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.0001591293432675407, |
|
"loss": 0.4418, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.0001590125275268464, |
|
"loss": 0.4322, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.00015889558809904902, |
|
"loss": 0.4346, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 0.4282, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.00015866133916279886, |
|
"loss": 0.4389, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.00015854403014531937, |
|
"loss": 0.4303, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.0001584265984226823, |
|
"loss": 0.4343, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0001583090442410182, |
|
"loss": 0.4348, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0001581913678467144, |
|
"loss": 0.4332, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0001580735694864143, |
|
"loss": 0.4269, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.000157955649407017, |
|
"loss": 0.4343, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00015783760785567665, |
|
"loss": 0.4393, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.00015771944507980207, |
|
"loss": 0.4323, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0001576011613270561, |
|
"loss": 0.4332, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00015748275684535515, |
|
"loss": 0.4323, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.0001573642318828687, |
|
"loss": 0.4292, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.00015724558668801875, |
|
"loss": 0.4303, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.00015712682150947923, |
|
"loss": 0.4311, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.00015700793659617567, |
|
"loss": 0.4302, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.00015688893219728445, |
|
"loss": 0.4311, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.00015676980856223248, |
|
"loss": 0.4349, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.00015665056594069647, |
|
"loss": 0.4277, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.00015653120458260263, |
|
"loss": 0.4334, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.00015641172473812592, |
|
"loss": 0.4297, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.00015629212665768978, |
|
"loss": 0.4328, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.00015617241059196534, |
|
"loss": 0.4362, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.00015605257679187113, |
|
"loss": 0.4316, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00015593262550857233, |
|
"loss": 0.4365, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00015581255699348046, |
|
"loss": 0.4316, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.00015569237149825265, |
|
"loss": 0.4273, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.00015557206927479137, |
|
"loss": 0.4296, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.00015545165057524358, |
|
"loss": 0.4362, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.00015533111565200044, |
|
"loss": 0.4323, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.00015521046475769674, |
|
"loss": 0.4344, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.00015508969814521025, |
|
"loss": 0.4291, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.0001549688160676614, |
|
"loss": 0.4317, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.0001548478187784125, |
|
"loss": 0.4304, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.00015472670653106745, |
|
"loss": 0.4301, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.00015460547957947104, |
|
"loss": 0.4316, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.00015448413817770846, |
|
"loss": 0.4277, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00015436268258010478, |
|
"loss": 0.4234, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00015424111304122447, |
|
"loss": 0.4266, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 0.46920087933540344, |
|
"eval_runtime": 28.2397, |
|
"eval_samples_per_second": 7.082, |
|
"eval_steps_per_second": 0.885, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0001541194298158708, |
|
"loss": 0.4297, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00015399763315908528, |
|
"loss": 0.4263, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.0001538757233261472, |
|
"loss": 0.4264, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.00015375370057257306, |
|
"loss": 0.4329, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.00015363156515411602, |
|
"loss": 0.4166, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.0001535093173267654, |
|
"loss": 0.4295, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.00015338695734674605, |
|
"loss": 0.4378, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.00015326448547051805, |
|
"loss": 0.4306, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.00015314190195477584, |
|
"loss": 0.4321, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.00015301920705644792, |
|
"loss": 0.4264, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.00015289640103269625, |
|
"loss": 0.4284, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.00015277348414091568, |
|
"loss": 0.434, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.0001526504566387335, |
|
"loss": 0.4311, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.00015252731878400864, |
|
"loss": 0.4269, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00015240407083483163, |
|
"loss": 0.4259, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.0001522807130495235, |
|
"loss": 0.4341, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00015215724568663555, |
|
"loss": 0.4285, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.0001520336690049488, |
|
"loss": 0.4284, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0001519099832634734, |
|
"loss": 0.4388, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.00015178618872144797, |
|
"loss": 0.4327, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.00015166228563833934, |
|
"loss": 0.4287, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00015153827427384173, |
|
"loss": 0.4297, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00015141415488787626, |
|
"loss": 0.4266, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.00015128992774059063, |
|
"loss": 0.4245, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.00015116559309235825, |
|
"loss": 0.4289, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00015104115120377783, |
|
"loss": 0.4219, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00015091660233567305, |
|
"loss": 0.4281, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.0001507919467490916, |
|
"loss": 0.4241, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.00015066718470530495, |
|
"loss": 0.4243, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00015054231646580764, |
|
"loss": 0.4218, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00015041734229231688, |
|
"loss": 0.4294, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.00015029226244677178, |
|
"loss": 0.4273, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.0001501670771913331, |
|
"loss": 0.4258, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.0001500417867883824, |
|
"loss": 0.4278, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.0001499163915005216, |
|
"loss": 0.4312, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.00014979089159057265, |
|
"loss": 0.4278, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.00014966528732157658, |
|
"loss": 0.422, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.0001495395789567932, |
|
"loss": 0.4234, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0001494137667597006, |
|
"loss": 0.4192, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.00014928785099399433, |
|
"loss": 0.4199, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00014916183192358718, |
|
"loss": 0.4263, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00014903570981260832, |
|
"loss": 0.4266, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.000148909484925403, |
|
"loss": 0.4282, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.00014878315752653185, |
|
"loss": 0.429, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.00014865672788077027, |
|
"loss": 0.4195, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.00014853019625310813, |
|
"loss": 0.4295, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.00014840356290874888, |
|
"loss": 0.4232, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00014827682811310928, |
|
"loss": 0.4287, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00014814999213181866, |
|
"loss": 0.4249, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.00014802305523071852, |
|
"loss": 0.425, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.00014789601767586173, |
|
"loss": 0.4169, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.0001477688797335123, |
|
"loss": 0.4206, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.00014764164167014451, |
|
"loss": 0.4189, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.00014751430375244256, |
|
"loss": 0.4256, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.00014738686624729986, |
|
"loss": 0.4245, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00014725932942181872, |
|
"loss": 0.4244, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.0001471316935433094, |
|
"loss": 0.4208, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00014700395887928995, |
|
"loss": 0.4229, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.00014687612569748535, |
|
"loss": 0.4257, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.00014674819426582712, |
|
"loss": 0.4261, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00014662016485245274, |
|
"loss": 0.4214, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.000146492037725705, |
|
"loss": 0.4241, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0001463638131541315, |
|
"loss": 0.4305, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.00014623549140648412, |
|
"loss": 0.4208, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.0001461070727517183, |
|
"loss": 0.4258, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.00014597855745899274, |
|
"loss": 0.4193, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00014584994579766865, |
|
"loss": 0.4285, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.0001457212380373091, |
|
"loss": 0.4204, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00014559243444767878, |
|
"loss": 0.423, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.0001454635352987431, |
|
"loss": 0.4253, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00014533454086066772, |
|
"loss": 0.4238, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.00014520545140381816, |
|
"loss": 0.419, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.00014507626719875897, |
|
"loss": 0.4245, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.0001449469885162534, |
|
"loss": 0.4204, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.00014481761562726262, |
|
"loss": 0.4262, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.0001446881488029453, |
|
"loss": 0.4267, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.00014455858831465695, |
|
"loss": 0.4235, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.00014442893443394945, |
|
"loss": 0.4197, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.00014429918743257044, |
|
"loss": 0.4216, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.0001441693475824626, |
|
"loss": 0.4222, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.00014403941515576344, |
|
"loss": 0.4184, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.00014390939042480428, |
|
"loss": 0.4194, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.0001437792736621101, |
|
"loss": 0.4213, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.0001436490651403986, |
|
"loss": 0.4188, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00014351876513257986, |
|
"loss": 0.4192, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00014338837391175582, |
|
"loss": 0.4215, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00014325789175121946, |
|
"loss": 0.4198, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00014312731892445442, |
|
"loss": 0.4232, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.00014299665570513437, |
|
"loss": 0.4246, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.00014286590236712237, |
|
"loss": 0.4185, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00014273505918447054, |
|
"loss": 0.4214, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.0001426041264314191, |
|
"loss": 0.418, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0001424731043823962, |
|
"loss": 0.4235, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.00014234199331201696, |
|
"loss": 0.424, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0001422107934950832, |
|
"loss": 0.4219, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.00014207950520658274, |
|
"loss": 0.4266, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0001419481287216888, |
|
"loss": 0.4193, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.00014181666431575945, |
|
"loss": 0.4168, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0001416851122643371, |
|
"loss": 0.4196, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.0001415534728431478, |
|
"loss": 0.4196, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.00014142174632810072, |
|
"loss": 0.4205, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.00014128993299528762, |
|
"loss": 0.4248, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.0001411580331209822, |
|
"loss": 0.4161, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.00014102604698163951, |
|
"loss": 0.4191, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.00014089397485389548, |
|
"loss": 0.4233, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00014076181701456623, |
|
"loss": 0.4188, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00014062957374064752, |
|
"loss": 0.4274, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00014049724530931416, |
|
"loss": 0.4202, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.00014036483199791948, |
|
"loss": 0.4164, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.00014023233408399472, |
|
"loss": 0.4226, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0001400997518452484, |
|
"loss": 0.4134, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.00013996708555956582, |
|
"loss": 0.4217, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.0001398343355050084, |
|
"loss": 0.4215, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.00013970150195981323, |
|
"loss": 0.4138, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.00013956858520239223, |
|
"loss": 0.4121, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.00013943558551133186, |
|
"loss": 0.416, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.00013930250316539238, |
|
"loss": 0.4203, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.0001391693384435072, |
|
"loss": 0.4236, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.0001390360916247826, |
|
"loss": 0.4179, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.0001389027629884966, |
|
"loss": 0.4136, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.00013876935281409907, |
|
"loss": 0.4257, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0001386358613812105, |
|
"loss": 0.4234, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0001385022889696218, |
|
"loss": 0.4177, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00013836863585929365, |
|
"loss": 0.4185, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.0001382349023303558, |
|
"loss": 0.4192, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.0001381010886631066, |
|
"loss": 0.4187, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.00013796719513801232, |
|
"loss": 0.4197, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0001378332220357066, |
|
"loss": 0.4184, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.00013769916963698997, |
|
"loss": 0.4203, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.000137565038222829, |
|
"loss": 0.4198, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00013743082807435615, |
|
"loss": 0.4211, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00013729653947286847, |
|
"loss": 0.4132, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.00013716217269982788, |
|
"loss": 0.4168, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.00013702772803685984, |
|
"loss": 0.417, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.00013689320576575322, |
|
"loss": 0.4247, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.00013675860616845954, |
|
"loss": 0.4186, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.00013662392952709228, |
|
"loss": 0.4116, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0001364891761239266, |
|
"loss": 0.4087, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_loss": 0.4685542583465576, |
|
"eval_runtime": 28.2711, |
|
"eval_samples_per_second": 7.074, |
|
"eval_steps_per_second": 0.884, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00013635434624139828, |
|
"loss": 0.4133, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.00013621944016210366, |
|
"loss": 0.4181, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.00013608445816879866, |
|
"loss": 0.414, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.00013594940054439825, |
|
"loss": 0.4165, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.00013581426757197605, |
|
"loss": 0.4153, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.00013567905953476355, |
|
"loss": 0.4218, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.00013554377671614958, |
|
"loss": 0.4177, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00013540841939967962, |
|
"loss": 0.4198, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00013527298786905544, |
|
"loss": 0.4194, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.0001351374824081343, |
|
"loss": 0.4157, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.0001350019033009283, |
|
"loss": 0.4109, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.00013486625083160414, |
|
"loss": 0.4195, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00013473052528448201, |
|
"loss": 0.416, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00013459472694403552, |
|
"loss": 0.4198, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00013445885609489068, |
|
"loss": 0.4155, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.0001343229130218255, |
|
"loss": 0.4221, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00013418689800976942, |
|
"loss": 0.419, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00013405081134380264, |
|
"loss": 0.4186, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00013391465330915556, |
|
"loss": 0.4148, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00013377842419120807, |
|
"loss": 0.4227, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.00013364212427548916, |
|
"loss": 0.4155, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0001335057538476762, |
|
"loss": 0.4139, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.00013336931319359426, |
|
"loss": 0.4136, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.00013323280259921574, |
|
"loss": 0.4168, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.00013309622235065942, |
|
"loss": 0.4132, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.0001329595727341903, |
|
"loss": 0.4124, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00013282285403621864, |
|
"loss": 0.414, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.0001326860665432995, |
|
"loss": 0.4164, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00013254921054213224, |
|
"loss": 0.41, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00013241228631955963, |
|
"loss": 0.41, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00013227529416256754, |
|
"loss": 0.4124, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0001321382343582842, |
|
"loss": 0.4134, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00013200110719397968, |
|
"loss": 0.4162, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00013186391295706517, |
|
"loss": 0.4216, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.00013172665193509242, |
|
"loss": 0.412, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.00013158932441575326, |
|
"loss": 0.4089, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00013145193068687876, |
|
"loss": 0.4132, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00013131447103643885, |
|
"loss": 0.4075, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00013117694575254162, |
|
"loss": 0.4165, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00013103935512343273, |
|
"loss": 0.4139, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 0.4095, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00013076397898324665, |
|
"loss": 0.416, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.00013062619404934317, |
|
"loss": 0.4207, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.00013048834492457415, |
|
"loss": 0.4042, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.00013035043189786393, |
|
"loss": 0.4194, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.00013021245525827096, |
|
"loss": 0.4159, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.00013007441529498673, |
|
"loss": 0.4138, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00012993631229733582, |
|
"loss": 0.4129, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.0001297981465547746, |
|
"loss": 0.4063, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.0001296599183568911, |
|
"loss": 0.4112, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.00012952162799340425, |
|
"loss": 0.4066, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00012938327575416327, |
|
"loss": 0.4118, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00012924486192914705, |
|
"loss": 0.4091, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00012910638680846358, |
|
"loss": 0.4155, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00012896785068234926, |
|
"loss": 0.4133, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.00012882925384116842, |
|
"loss": 0.4117, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0001286905965754127, |
|
"loss": 0.4142, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0001285518791757002, |
|
"loss": 0.409, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00012841310193277528, |
|
"loss": 0.409, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0001282742651375076, |
|
"loss": 0.4124, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.00012813536908089164, |
|
"loss": 0.4135, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.0001279964140540461, |
|
"loss": 0.4045, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.00012785740034821329, |
|
"loss": 0.4134, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.00012771832825475852, |
|
"loss": 0.4157, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0001275791980651695, |
|
"loss": 0.4143, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0001274400100710556, |
|
"loss": 0.4146, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.00012730076456414746, |
|
"loss": 0.411, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.0001271614618362962, |
|
"loss": 0.4131, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00012702210217947288, |
|
"loss": 0.4162, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.0001268826858857679, |
|
"loss": 0.4099, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00012674321324739038, |
|
"loss": 0.4135, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00012660368455666752, |
|
"loss": 0.4106, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00012646410010604397, |
|
"loss": 0.4027, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00012632446018808128, |
|
"loss": 0.4118, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00012618476509545725, |
|
"loss": 0.4143, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.00012604501512096533, |
|
"loss": 0.414, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.000125905210557514, |
|
"loss": 0.4064, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00012576535169812615, |
|
"loss": 0.4188, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00012562543883593848, |
|
"loss": 0.4048, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00012548547226420089, |
|
"loss": 0.4094, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0001253454522762758, |
|
"loss": 0.4055, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.00012520537916563756, |
|
"loss": 0.4097, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.00012506525322587207, |
|
"loss": 0.4064, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.0001249250747506757, |
|
"loss": 0.4076, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00012478484403385506, |
|
"loss": 0.4081, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00012464456136932625, |
|
"loss": 0.4122, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0001245042270511142, |
|
"loss": 0.4073, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0001243638413733522, |
|
"loss": 0.4098, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00012422340463028107, |
|
"loss": 0.413, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00012408291711624877, |
|
"loss": 0.4101, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.00012394237912570957, |
|
"loss": 0.4128, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.00012380179095322364, |
|
"loss": 0.4062, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.0001236611528934562, |
|
"loss": 0.4119, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.00012352046524117716, |
|
"loss": 0.4093, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.0001233797282912603, |
|
"loss": 0.4072, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.00012323894233868274, |
|
"loss": 0.4048, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.00012309810767852433, |
|
"loss": 0.4117, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00012295722460596697, |
|
"loss": 0.4123, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0001228162934162941, |
|
"loss": 0.4127, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.00012267531440488986, |
|
"loss": 0.4106, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.00012253428786723877, |
|
"loss": 0.4078, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00012239321409892494, |
|
"loss": 0.4111, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00012225209339563145, |
|
"loss": 0.4123, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.00012211092605313972, |
|
"loss": 0.4049, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.00012196971236732894, |
|
"loss": 0.406, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.00012182845263417549, |
|
"loss": 0.4085, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.00012168714714975218, |
|
"loss": 0.407, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.00012154579621022777, |
|
"loss": 0.413, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.0001214044001118663, |
|
"loss": 0.3998, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.00012126295915102639, |
|
"loss": 0.4042, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.00012112147362416076, |
|
"loss": 0.4118, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.00012097994382781547, |
|
"loss": 0.4091, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00012083837005862946, |
|
"loss": 0.4009, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00012069675261333375, |
|
"loss": 0.4124, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.00012055509178875097, |
|
"loss": 0.4041, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.0001204133878817946, |
|
"loss": 0.4029, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00012027164118946844, |
|
"loss": 0.4104, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00012012985200886602, |
|
"loss": 0.4065, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00011998802063716987, |
|
"loss": 0.4074, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00011984614737165094, |
|
"loss": 0.4077, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00011970423250966807, |
|
"loss": 0.4136, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.00011956227634866714, |
|
"loss": 0.4124, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.00011942027918618074, |
|
"loss": 0.4058, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00011927824131982734, |
|
"loss": 0.4066, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00011913616304731063, |
|
"loss": 0.4091, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.0001189940446664192, |
|
"loss": 0.41, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.00011885188647502546, |
|
"loss": 0.3986, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.00011870968877108546, |
|
"loss": 0.4116, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.00011856745185263791, |
|
"loss": 0.4157, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00011842517601780388, |
|
"loss": 0.407, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00011828286156478585, |
|
"loss": 0.4098, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.00011814050879186731, |
|
"loss": 0.4045, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.0001179981179974121, |
|
"loss": 0.4067, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.00011785568947986367, |
|
"loss": 0.4101, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.0001177132235377446, |
|
"loss": 0.4046, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00011757072046965589, |
|
"loss": 0.4024, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00011742818057427636, |
|
"loss": 0.3982, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00011728560415036201, |
|
"loss": 0.399, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"eval_loss": 0.47236838936805725, |
|
"eval_runtime": 28.2487, |
|
"eval_samples_per_second": 7.08, |
|
"eval_steps_per_second": 0.885, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00011714299149674537, |
|
"loss": 0.402, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.00011700034291233499, |
|
"loss": 0.4055, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.00011685765869611463, |
|
"loss": 0.4065, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00011671493914714279, |
|
"loss": 0.4112, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00011657218456455206, |
|
"loss": 0.4032, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.00011642939524754832, |
|
"loss": 0.404, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.00011628657149541045, |
|
"loss": 0.402, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.00011614371360748935, |
|
"loss": 0.407, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.00011600082188320752, |
|
"loss": 0.4095, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00011585789662205835, |
|
"loss": 0.4102, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00011571493812360561, |
|
"loss": 0.407, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00011557194668748262, |
|
"loss": 0.4038, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.00011542892261339178, |
|
"loss": 0.4073, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.00011528586620110396, |
|
"loss": 0.3989, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00011514277775045768, |
|
"loss": 0.4077, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00011499965756135873, |
|
"loss": 0.404, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.0001148565059337794, |
|
"loss": 0.4096, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00011471332316775773, |
|
"loss": 0.4091, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.0001145701095633973, |
|
"loss": 0.4082, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00011442686542086609, |
|
"loss": 0.4112, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00011428359104039617, |
|
"loss": 0.404, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00011414028672228293, |
|
"loss": 0.4059, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.00011399695276688469, |
|
"loss": 0.3987, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.00011385358947462166, |
|
"loss": 0.4035, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.00011371019714597562, |
|
"loss": 0.4009, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.00011356677608148933, |
|
"loss": 0.4082, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.00011342332658176555, |
|
"loss": 0.4025, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.00011327984894746686, |
|
"loss": 0.4076, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.00011313634347931466, |
|
"loss": 0.4026, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00011299281047808877, |
|
"loss": 0.4028, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00011284925024462665, |
|
"loss": 0.4021, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.00011270566307982291, |
|
"loss": 0.4026, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.00011256204928462857, |
|
"loss": 0.3941, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00011241840916005043, |
|
"loss": 0.4095, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00011227474300715055, |
|
"loss": 0.4073, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.0001121310511270455, |
|
"loss": 0.3998, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.00011198733382090576, |
|
"loss": 0.3974, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.00011184359138995517, |
|
"loss": 0.4039, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.00011169982413547012, |
|
"loss": 0.401, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.00011155603235877912, |
|
"loss": 0.3985, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.00011141221636126202, |
|
"loss": 0.3997, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.00011126837644434953, |
|
"loss": 0.4027, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00011112451290952237, |
|
"loss": 0.4002, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00011098062605831084, |
|
"loss": 0.4048, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.00011083671619229408, |
|
"loss": 0.4022, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.00011069278361309945, |
|
"loss": 0.3989, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00011054882862240199, |
|
"loss": 0.4082, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00011040485152192363, |
|
"loss": 0.3989, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00011026085261343271, |
|
"loss": 0.4064, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.00011011683219874323, |
|
"loss": 0.3947, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.00010997279057971425, |
|
"loss": 0.3972, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.00010982872805824936, |
|
"loss": 0.4013, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.00010968464493629584, |
|
"loss": 0.3996, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00010954054151584425, |
|
"loss": 0.404, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00010939641809892767, |
|
"loss": 0.4021, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.00010925227498762106, |
|
"loss": 0.402, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.00010910811248404065, |
|
"loss": 0.398, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00010896393089034336, |
|
"loss": 0.4011, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00010881973050872612, |
|
"loss": 0.4004, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.0001086755116414252, |
|
"loss": 0.3949, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.00010853127459071567, |
|
"loss": 0.4068, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.00010838701965891063, |
|
"loss": 0.4015, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00010824274714836073, |
|
"loss": 0.3982, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00010809845736145346, |
|
"loss": 0.401, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.00010795415060061243, |
|
"loss": 0.4076, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.00010780982716829698, |
|
"loss": 0.4029, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00010766548736700124, |
|
"loss": 0.3954, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00010752113149925378, |
|
"loss": 0.4047, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.00010737675986761677, |
|
"loss": 0.3969, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.00010723237277468538, |
|
"loss": 0.4052, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0001070879705230873, |
|
"loss": 0.4001, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00010694355341548188, |
|
"loss": 0.4037, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0001067991217545597, |
|
"loss": 0.3952, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.00010665467584304178, |
|
"loss": 0.4021, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.00010651021598367906, |
|
"loss": 0.3973, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00010636574247925161, |
|
"loss": 0.3979, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00010622125563256821, |
|
"loss": 0.4043, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.0001060767557464656, |
|
"loss": 0.4024, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.00010593224312380776, |
|
"loss": 0.398, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.00010578771806748546, |
|
"loss": 0.4003, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.00010564318088041551, |
|
"loss": 0.3953, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.0001054986318655401, |
|
"loss": 0.3937, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.00010535407132582622, |
|
"loss": 0.4051, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.00010520949956426505, |
|
"loss": 0.398, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.00010506491688387127, |
|
"loss": 0.3946, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.00010492032358768247, |
|
"loss": 0.3993, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.00010477571997875848, |
|
"loss": 0.397, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.00010463110636018065, |
|
"loss": 0.3992, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.00010448648303505151, |
|
"loss": 0.4015, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.00010434185030649372, |
|
"loss": 0.3984, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.0001041972084776498, |
|
"loss": 0.4009, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00010405255785168131, |
|
"loss": 0.4009, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.00010390789873176818, |
|
"loss": 0.401, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.0001037632314211082, |
|
"loss": 0.3966, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.00010361855622291637, |
|
"loss": 0.3975, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.00010347387344042408, |
|
"loss": 0.3994, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.00010332918337687879, |
|
"loss": 0.3993, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00010318448633554308, |
|
"loss": 0.3985, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.0001030397826196943, |
|
"loss": 0.3957, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.00010289507253262358, |
|
"loss": 0.4014, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.00010275035637763563, |
|
"loss": 0.3962, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.00010260563445804776, |
|
"loss": 0.3978, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.0001024609070771893, |
|
"loss": 0.3981, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.00010231617453840119, |
|
"loss": 0.4024, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.00010217143714503508, |
|
"loss": 0.3965, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.00010202669520045278, |
|
"loss": 0.3952, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.00010188194900802566, |
|
"loss": 0.401, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.00010173719887113402, |
|
"loss": 0.3989, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.00010159244509316644, |
|
"loss": 0.3986, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.00010144768797751905, |
|
"loss": 0.3936, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.00010130292782759507, |
|
"loss": 0.3933, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.00010115816494680399, |
|
"loss": 0.3998, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.00010101339963856111, |
|
"loss": 0.3937, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.00010086863220628675, |
|
"loss": 0.3925, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.00010072386295340572, |
|
"loss": 0.399, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.00010057909218334665, |
|
"loss": 0.4014, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.00010043432019954131, |
|
"loss": 0.3977, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.00010028954730542406, |
|
"loss": 0.401, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.0001001447738044311, |
|
"loss": 0.3986, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3989, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 9.985522619556893e-05, |
|
"loss": 0.3977, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 9.971045269457598e-05, |
|
"loss": 0.3971, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 9.956567980045872e-05, |
|
"loss": 0.3948, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 9.942090781665336e-05, |
|
"loss": 0.402, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 9.927613704659429e-05, |
|
"loss": 0.3976, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 9.913136779371326e-05, |
|
"loss": 0.395, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 9.898660036143893e-05, |
|
"loss": 0.3977, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 9.884183505319604e-05, |
|
"loss": 0.3967, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 9.869707217240497e-05, |
|
"loss": 0.3939, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 9.855231202248097e-05, |
|
"loss": 0.4004, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 9.840755490683357e-05, |
|
"loss": 0.4001, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 9.8262801128866e-05, |
|
"loss": 0.3912, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 9.811805099197438e-05, |
|
"loss": 0.397, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 9.797330479954725e-05, |
|
"loss": 0.3939, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 9.782856285496495e-05, |
|
"loss": 0.4022, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 9.76838254615988e-05, |
|
"loss": 0.3908, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 9.75390929228107e-05, |
|
"loss": 0.3905, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 9.739436554195227e-05, |
|
"loss": 0.3875, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"eval_loss": 0.47556066513061523, |
|
"eval_runtime": 28.1986, |
|
"eval_samples_per_second": 7.093, |
|
"eval_steps_per_second": 0.887, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.72496436223644e-05, |
|
"loss": 0.3972, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.710492746737643e-05, |
|
"loss": 0.3897, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.696021738030575e-05, |
|
"loss": 0.3939, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.681551366445694e-05, |
|
"loss": 0.3945, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.667081662312124e-05, |
|
"loss": 0.3993, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.652612655957596e-05, |
|
"loss": 0.3915, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.638144377708367e-05, |
|
"loss": 0.3993, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 9.62367685788918e-05, |
|
"loss": 0.3994, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 9.609210126823185e-05, |
|
"loss": 0.3963, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 9.59474421483187e-05, |
|
"loss": 0.3897, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 9.580279152235019e-05, |
|
"loss": 0.3982, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.565814969350629e-05, |
|
"loss": 0.3981, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.551351696494854e-05, |
|
"loss": 0.394, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.536889363981936e-05, |
|
"loss": 0.4008, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 9.522428002124157e-05, |
|
"loss": 0.3981, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 9.507967641231756e-05, |
|
"loss": 0.3999, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 9.493508311612874e-05, |
|
"loss": 0.3974, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 9.479050043573497e-05, |
|
"loss": 0.4002, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 9.46459286741738e-05, |
|
"loss": 0.3947, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 9.450136813445994e-05, |
|
"loss": 0.3945, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 9.43568191195845e-05, |
|
"loss": 0.3955, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 9.421228193251452e-05, |
|
"loss": 0.3937, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 9.406775687619223e-05, |
|
"loss": 0.3938, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 9.39232442535344e-05, |
|
"loss": 0.3895, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 9.377874436743184e-05, |
|
"loss": 0.394, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 9.363425752074844e-05, |
|
"loss": 0.3909, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 9.348978401632101e-05, |
|
"loss": 0.397, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 9.334532415695824e-05, |
|
"loss": 0.3906, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 9.320087824544031e-05, |
|
"loss": 0.3944, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 9.305644658451813e-05, |
|
"loss": 0.3941, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 9.291202947691271e-05, |
|
"loss": 0.3929, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.276762722531463e-05, |
|
"loss": 0.3929, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.262324013238325e-05, |
|
"loss": 0.3959, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 9.247886850074622e-05, |
|
"loss": 0.3941, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 9.233451263299875e-05, |
|
"loss": 0.3916, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 9.219017283170302e-05, |
|
"loss": 0.3891, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 9.204584939938762e-05, |
|
"loss": 0.3934, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 9.19015426385466e-05, |
|
"loss": 0.3918, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 9.175725285163932e-05, |
|
"loss": 0.3883, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 9.161298034108941e-05, |
|
"loss": 0.3926, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 9.146872540928437e-05, |
|
"loss": 0.3964, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 9.132448835857483e-05, |
|
"loss": 0.3939, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 9.118026949127389e-05, |
|
"loss": 0.3945, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 9.103606910965666e-05, |
|
"loss": 0.3953, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 9.089188751595936e-05, |
|
"loss": 0.3906, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 9.074772501237897e-05, |
|
"loss": 0.3933, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 9.060358190107234e-05, |
|
"loss": 0.3933, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 9.045945848415573e-05, |
|
"loss": 0.3858, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 9.031535506370417e-05, |
|
"loss": 0.3883, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 9.017127194175068e-05, |
|
"loss": 0.3885, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 9.002720942028577e-05, |
|
"loss": 0.3952, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 8.98831678012568e-05, |
|
"loss": 0.3939, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 8.97391473865673e-05, |
|
"loss": 0.3893, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 8.959514847807639e-05, |
|
"loss": 0.3895, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 8.945117137759802e-05, |
|
"loss": 0.392, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 8.930721638690056e-05, |
|
"loss": 0.385, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 8.916328380770595e-05, |
|
"loss": 0.3933, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.901937394168917e-05, |
|
"loss": 0.3954, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.887548709047764e-05, |
|
"loss": 0.3983, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 8.873162355565046e-05, |
|
"loss": 0.3864, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 8.858778363873796e-05, |
|
"loss": 0.386, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 8.844396764122093e-05, |
|
"loss": 0.3981, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 8.830017586452993e-05, |
|
"loss": 0.3939, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 8.815640861004488e-05, |
|
"loss": 0.3939, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 8.801266617909427e-05, |
|
"loss": 0.3859, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 8.786894887295451e-05, |
|
"loss": 0.3954, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 8.772525699284946e-05, |
|
"loss": 0.3909, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 8.75815908399496e-05, |
|
"loss": 0.3878, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 8.743795071537146e-05, |
|
"loss": 0.3881, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 8.729433692017711e-05, |
|
"loss": 0.3908, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 8.715074975537338e-05, |
|
"loss": 0.3899, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 8.700718952191124e-05, |
|
"loss": 0.397, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.686365652068535e-05, |
|
"loss": 0.3852, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.672015105253319e-05, |
|
"loss": 0.3877, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.657667341823448e-05, |
|
"loss": 0.393, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 8.643322391851072e-05, |
|
"loss": 0.3941, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 8.628980285402439e-05, |
|
"loss": 0.3949, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 8.614641052537838e-05, |
|
"loss": 0.3884, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 8.600304723311534e-05, |
|
"loss": 0.3872, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 8.585971327771707e-05, |
|
"loss": 0.3789, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 8.571640895960387e-05, |
|
"loss": 0.3925, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 8.557313457913394e-05, |
|
"loss": 0.3883, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 8.54298904366027e-05, |
|
"loss": 0.3875, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 8.528667683224225e-05, |
|
"loss": 0.3867, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 8.514349406622064e-05, |
|
"loss": 0.3839, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 8.50003424386413e-05, |
|
"loss": 0.393, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.485722224954237e-05, |
|
"loss": 0.3953, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.471413379889609e-05, |
|
"loss": 0.3903, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 8.457107738660826e-05, |
|
"loss": 0.3897, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 8.44280533125174e-05, |
|
"loss": 0.3942, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 8.428506187639443e-05, |
|
"loss": 0.3957, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 8.414210337794166e-05, |
|
"loss": 0.3908, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 8.39991781167925e-05, |
|
"loss": 0.3829, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 8.385628639251066e-05, |
|
"loss": 0.3885, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 8.371342850458955e-05, |
|
"loss": 0.388, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 8.357060475245166e-05, |
|
"loss": 0.3863, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 8.342781543544798e-05, |
|
"loss": 0.3897, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 8.328506085285724e-05, |
|
"loss": 0.389, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 8.31423413038854e-05, |
|
"loss": 0.3897, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 8.299965708766505e-05, |
|
"loss": 0.3896, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 8.285700850325467e-05, |
|
"loss": 0.3903, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 8.271439584963802e-05, |
|
"loss": 0.392, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 8.257181942572365e-05, |
|
"loss": 0.3932, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 8.242927953034412e-05, |
|
"loss": 0.3879, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 8.22867764622554e-05, |
|
"loss": 0.3923, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 8.214431052013634e-05, |
|
"loss": 0.3884, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 8.200188200258791e-05, |
|
"loss": 0.3855, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 8.18594912081327e-05, |
|
"loss": 0.3859, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 8.171713843521418e-05, |
|
"loss": 0.3871, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 8.157482398219613e-05, |
|
"loss": 0.3807, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 8.143254814736211e-05, |
|
"loss": 0.3863, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 8.129031122891459e-05, |
|
"loss": 0.3901, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 8.114811352497458e-05, |
|
"loss": 0.3927, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 8.100595533358084e-05, |
|
"loss": 0.3809, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 8.086383695268938e-05, |
|
"loss": 0.3854, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 8.072175868017268e-05, |
|
"loss": 0.3926, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 8.057972081381927e-05, |
|
"loss": 0.3955, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 8.043772365133287e-05, |
|
"loss": 0.388, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 8.029576749033194e-05, |
|
"loss": 0.3881, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 8.015385262834906e-05, |
|
"loss": 0.3888, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 8.001197936283014e-05, |
|
"loss": 0.3851, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.987014799113397e-05, |
|
"loss": 0.3905, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.972835881053159e-05, |
|
"loss": 0.3913, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 7.958661211820545e-05, |
|
"loss": 0.3884, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 7.944490821124908e-05, |
|
"loss": 0.3938, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 7.930324738666627e-05, |
|
"loss": 0.3892, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 7.916162994137056e-05, |
|
"loss": 0.3819, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 7.902005617218454e-05, |
|
"loss": 0.3874, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 7.887852637583926e-05, |
|
"loss": 0.3873, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 7.873704084897365e-05, |
|
"loss": 0.3865, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 7.85955998881337e-05, |
|
"loss": 0.3914, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 7.845420378977223e-05, |
|
"loss": 0.3866, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 7.831285285024781e-05, |
|
"loss": 0.3829, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 7.817154736582452e-05, |
|
"loss": 0.383, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 7.803028763267108e-05, |
|
"loss": 0.3942, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 7.788907394686033e-05, |
|
"loss": 0.3779, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 7.774790660436858e-05, |
|
"loss": 0.3832, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"eval_loss": 0.4780256748199463, |
|
"eval_runtime": 28.2756, |
|
"eval_samples_per_second": 7.073, |
|
"eval_steps_per_second": 0.884, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 7.760678590107507e-05, |
|
"loss": 0.3799, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 7.746571213276125e-05, |
|
"loss": 0.3877, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 7.732468559511016e-05, |
|
"loss": 0.3894, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 7.718370658370596e-05, |
|
"loss": 0.3867, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 7.704277539403304e-05, |
|
"loss": 0.3849, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 7.690189232147566e-05, |
|
"loss": 0.3834, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 7.676105766131726e-05, |
|
"loss": 0.3855, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 7.66202717087397e-05, |
|
"loss": 0.3831, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 7.647953475882285e-05, |
|
"loss": 0.3849, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 7.633884710654383e-05, |
|
"loss": 0.3867, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 7.619820904677641e-05, |
|
"loss": 0.3883, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 7.605762087429044e-05, |
|
"loss": 0.3872, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 7.591708288375125e-05, |
|
"loss": 0.3896, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 7.577659536971895e-05, |
|
"loss": 0.3916, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 7.56361586266478e-05, |
|
"loss": 0.3898, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 7.549577294888581e-05, |
|
"loss": 0.3884, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 7.535543863067377e-05, |
|
"loss": 0.391, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 7.521515596614496e-05, |
|
"loss": 0.3867, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 7.507492524932433e-05, |
|
"loss": 0.3866, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.493474677412794e-05, |
|
"loss": 0.3903, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.479462083436241e-05, |
|
"loss": 0.3917, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 7.465454772372425e-05, |
|
"loss": 0.3833, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 7.451452773579915e-05, |
|
"loss": 0.3872, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 7.437456116406152e-05, |
|
"loss": 0.385, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 7.423464830187386e-05, |
|
"loss": 0.3861, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 7.409478944248602e-05, |
|
"loss": 0.3788, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 7.39549848790347e-05, |
|
"loss": 0.3886, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 7.381523490454278e-05, |
|
"loss": 0.3845, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 7.367553981191875e-05, |
|
"loss": 0.3844, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 7.353589989395604e-05, |
|
"loss": 0.3828, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 7.339631544333249e-05, |
|
"loss": 0.3886, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 7.325678675260961e-05, |
|
"loss": 0.3829, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 7.31173141142321e-05, |
|
"loss": 0.3859, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 7.297789782052717e-05, |
|
"loss": 0.3829, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 7.283853816370386e-05, |
|
"loss": 0.387, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 7.269923543585258e-05, |
|
"loss": 0.38, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 7.255998992894443e-05, |
|
"loss": 0.3875, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 7.242080193483051e-05, |
|
"loss": 0.3846, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 7.228167174524148e-05, |
|
"loss": 0.3852, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 7.214259965178674e-05, |
|
"loss": 0.3833, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 7.200358594595392e-05, |
|
"loss": 0.3797, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 7.186463091910838e-05, |
|
"loss": 0.3818, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 7.172573486249241e-05, |
|
"loss": 0.3867, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 7.15868980672247e-05, |
|
"loss": 0.3807, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 7.14481208242998e-05, |
|
"loss": 0.384, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 7.130940342458732e-05, |
|
"loss": 0.3863, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 7.117074615883162e-05, |
|
"loss": 0.3842, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 7.10321493176508e-05, |
|
"loss": 0.385, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 7.089361319153649e-05, |
|
"loss": 0.3844, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 7.075513807085299e-05, |
|
"loss": 0.3841, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 7.061672424583677e-05, |
|
"loss": 0.3828, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 7.047837200659579e-05, |
|
"loss": 0.3814, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 7.034008164310892e-05, |
|
"loss": 0.3807, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 7.020185344522543e-05, |
|
"loss": 0.3787, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 7.006368770266421e-05, |
|
"loss": 0.3809, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 6.992558470501325e-05, |
|
"loss": 0.3784, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 6.978754474172909e-05, |
|
"loss": 0.3808, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 6.964956810213605e-05, |
|
"loss": 0.3788, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 6.95116550754259e-05, |
|
"loss": 0.3927, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 6.937380595065685e-05, |
|
"loss": 0.3848, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 6.923602101675337e-05, |
|
"loss": 0.3848, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 0.3805, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 6.89606448765673e-05, |
|
"loss": 0.3786, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 6.88230542474584e-05, |
|
"loss": 0.3823, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 6.868552896356117e-05, |
|
"loss": 0.3795, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 6.854806931312128e-05, |
|
"loss": 0.3875, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 6.841067558424677e-05, |
|
"loss": 0.3849, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 6.827334806490757e-05, |
|
"loss": 0.3856, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 6.813608704293484e-05, |
|
"loss": 0.3851, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 6.799889280602031e-05, |
|
"loss": 0.379, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 6.786176564171582e-05, |
|
"loss": 0.3843, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 6.77247058374325e-05, |
|
"loss": 0.3855, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 6.758771368044042e-05, |
|
"loss": 0.3795, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 6.74507894578678e-05, |
|
"loss": 0.3809, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 6.731393345670051e-05, |
|
"loss": 0.3858, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 6.717714596378137e-05, |
|
"loss": 0.3788, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 6.704042726580972e-05, |
|
"loss": 0.3801, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 6.69037776493406e-05, |
|
"loss": 0.3835, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 6.67671974007843e-05, |
|
"loss": 0.3817, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 6.663068680640574e-05, |
|
"loss": 0.3846, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 6.649424615232382e-05, |
|
"loss": 0.3826, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 6.635787572451083e-05, |
|
"loss": 0.379, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 6.622157580879195e-05, |
|
"loss": 0.3808, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 6.608534669084449e-05, |
|
"loss": 0.3782, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 6.59491886561974e-05, |
|
"loss": 0.386, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 6.58131019902306e-05, |
|
"loss": 0.3801, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 6.567708697817453e-05, |
|
"loss": 0.3838, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 6.554114390510935e-05, |
|
"loss": 0.379, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 6.540527305596449e-05, |
|
"loss": 0.3781, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 6.526947471551798e-05, |
|
"loss": 0.3812, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 6.513374916839587e-05, |
|
"loss": 0.3787, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 6.499809669907169e-05, |
|
"loss": 0.3865, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 6.486251759186572e-05, |
|
"loss": 0.3822, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 6.472701213094456e-05, |
|
"loss": 0.3791, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 6.45915806003204e-05, |
|
"loss": 0.3839, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 6.445622328385047e-05, |
|
"loss": 0.3788, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 6.432094046523646e-05, |
|
"loss": 0.3763, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 6.418573242802397e-05, |
|
"loss": 0.3868, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 6.405059945560179e-05, |
|
"loss": 0.3823, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 6.391554183120138e-05, |
|
"loss": 0.3752, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 6.378055983789637e-05, |
|
"loss": 0.3808, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 6.364565375860174e-05, |
|
"loss": 0.3863, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 6.351082387607345e-05, |
|
"loss": 0.3849, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 6.337607047290774e-05, |
|
"loss": 0.3801, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 6.324139383154049e-05, |
|
"loss": 0.3777, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 6.310679423424679e-05, |
|
"loss": 0.3814, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 6.297227196314018e-05, |
|
"loss": 0.3865, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 6.283782730017218e-05, |
|
"loss": 0.3758, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 6.270346052713154e-05, |
|
"loss": 0.3815, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 6.25691719256439e-05, |
|
"loss": 0.3797, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 6.243496177717099e-05, |
|
"loss": 0.3763, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 6.230083036301004e-05, |
|
"loss": 0.3775, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 6.216677796429342e-05, |
|
"loss": 0.3753, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 6.20328048619877e-05, |
|
"loss": 0.3835, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 6.189891133689342e-05, |
|
"loss": 0.3751, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 6.176509766964421e-05, |
|
"loss": 0.3806, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 6.163136414070635e-05, |
|
"loss": 0.3853, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 6.149771103037821e-05, |
|
"loss": 0.3844, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 6.136413861878953e-05, |
|
"loss": 0.3819, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 6.123064718590099e-05, |
|
"loss": 0.3771, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 6.10972370115034e-05, |
|
"loss": 0.3866, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 6.096390837521746e-05, |
|
"loss": 0.3799, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 6.0830661556492806e-05, |
|
"loss": 0.3767, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 6.069749683460765e-05, |
|
"loss": 0.3779, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 6.0564414488668165e-05, |
|
"loss": 0.3843, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 6.043141479760778e-05, |
|
"loss": 0.3739, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 6.0298498040186794e-05, |
|
"loss": 0.3831, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 6.0165664494991594e-05, |
|
"loss": 0.3801, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 6.0032914440434186e-05, |
|
"loss": 0.3844, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 5.9900248154751616e-05, |
|
"loss": 0.3854, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5.9767665916005286e-05, |
|
"loss": 0.382, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5.9635168002080564e-05, |
|
"loss": 0.3805, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 5.950275469068588e-05, |
|
"loss": 0.3775, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 5.937042625935252e-05, |
|
"loss": 0.379, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 5.923818298543379e-05, |
|
"loss": 0.3786, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 5.9106025146104525e-05, |
|
"loss": 0.3797, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 5.897395301836051e-05, |
|
"loss": 0.3748, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 5.8841966879017816e-05, |
|
"loss": 0.3682, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_loss": 0.4814474880695343, |
|
"eval_runtime": 28.2712, |
|
"eval_samples_per_second": 7.074, |
|
"eval_steps_per_second": 0.884, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"step": 1378, |
|
"total_flos": 5.88729111501865e+16, |
|
"train_loss": 0.4462547982638737, |
|
"train_runtime": 110414.5531, |
|
"train_samples_per_second": 2.524, |
|
"train_steps_per_second": 0.02 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2170, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 5.88729111501865e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|