|
{ |
|
"best_metric": 0.8241426348686218, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-450", |
|
"epoch": 0.005982033958012768, |
|
"eval_steps": 150, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.329340879558393e-05, |
|
"grad_norm": 1.9388657808303833, |
|
"learning_rate": 5e-06, |
|
"loss": 1.3911, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.329340879558393e-05, |
|
"eval_loss": 1.8459720611572266, |
|
"eval_runtime": 3461.4381, |
|
"eval_samples_per_second": 36.602, |
|
"eval_steps_per_second": 9.151, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.658681759116786e-05, |
|
"grad_norm": 2.8200104236602783, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3589, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 3.988022638675179e-05, |
|
"grad_norm": 2.7405104637145996, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.3274, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 5.317363518233572e-05, |
|
"grad_norm": 2.6861093044281006, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2136, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 6.646704397791965e-05, |
|
"grad_norm": 2.4807560443878174, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.2634, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 7.976045277350358e-05, |
|
"grad_norm": 3.396902322769165, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1798, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 9.305386156908751e-05, |
|
"grad_norm": 2.9391775131225586, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.2212, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00010634727036467144, |
|
"grad_norm": 2.992940902709961, |
|
"learning_rate": 4e-05, |
|
"loss": 1.1953, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00011964067916025536, |
|
"grad_norm": 3.2214736938476562, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.1323, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0001329340879558393, |
|
"grad_norm": 3.6011428833007812, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1466, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00014622749675142322, |
|
"grad_norm": 3.9849705696105957, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.0944, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00015952090554700716, |
|
"grad_norm": 3.5957984924316406, |
|
"learning_rate": 6e-05, |
|
"loss": 1.171, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00017281431434259108, |
|
"grad_norm": 2.5957956314086914, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 1.074, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00018610772313817502, |
|
"grad_norm": 3.4556539058685303, |
|
"learning_rate": 7e-05, |
|
"loss": 1.0507, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00019940113193375893, |
|
"grad_norm": 3.1671648025512695, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.0411, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00021269454072934287, |
|
"grad_norm": 2.902188539505005, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0133, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00022598794952492681, |
|
"grad_norm": 2.787954568862915, |
|
"learning_rate": 8.5e-05, |
|
"loss": 1.0352, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.00023928135832051073, |
|
"grad_norm": 2.525041103363037, |
|
"learning_rate": 9e-05, |
|
"loss": 0.9911, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00025257476711609464, |
|
"grad_norm": 2.6477138996124268, |
|
"learning_rate": 9.5e-05, |
|
"loss": 0.9524, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0002658681759116786, |
|
"grad_norm": 2.82763409614563, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0342, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0002791615847072625, |
|
"grad_norm": 3.421452760696411, |
|
"learning_rate": 9.999866555428618e-05, |
|
"loss": 0.9806, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.00029245499350284644, |
|
"grad_norm": 2.983229398727417, |
|
"learning_rate": 9.999466228837451e-05, |
|
"loss": 0.8953, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.00030574840229843035, |
|
"grad_norm": 3.4214861392974854, |
|
"learning_rate": 9.998799041595064e-05, |
|
"loss": 0.9724, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0003190418110940143, |
|
"grad_norm": 3.5156090259552, |
|
"learning_rate": 9.997865029314463e-05, |
|
"loss": 0.9613, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00033233521988959824, |
|
"grad_norm": 3.8509676456451416, |
|
"learning_rate": 9.996664241851197e-05, |
|
"loss": 0.913, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00034562862868518215, |
|
"grad_norm": 3.1751909255981445, |
|
"learning_rate": 9.995196743300692e-05, |
|
"loss": 1.1327, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0003589220374807661, |
|
"grad_norm": 3.8788204193115234, |
|
"learning_rate": 9.993462611994832e-05, |
|
"loss": 0.7996, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.00037221544627635004, |
|
"grad_norm": 3.27899432182312, |
|
"learning_rate": 9.991461940497786e-05, |
|
"loss": 0.9915, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.00038550885507193395, |
|
"grad_norm": 3.78037691116333, |
|
"learning_rate": 9.989194835601048e-05, |
|
"loss": 1.0367, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00039880226386751786, |
|
"grad_norm": 4.236947536468506, |
|
"learning_rate": 9.986661418317759e-05, |
|
"loss": 0.9108, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00041209567266310183, |
|
"grad_norm": 3.913956642150879, |
|
"learning_rate": 9.983861823876231e-05, |
|
"loss": 1.0729, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.00042538908145868575, |
|
"grad_norm": 4.501612186431885, |
|
"learning_rate": 9.980796201712734e-05, |
|
"loss": 1.0856, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.00043868249025426966, |
|
"grad_norm": 4.474135875701904, |
|
"learning_rate": 9.977464715463524e-05, |
|
"loss": 0.897, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00045197589904985363, |
|
"grad_norm": 5.405597686767578, |
|
"learning_rate": 9.973867542956104e-05, |
|
"loss": 1.1942, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00046526930784543754, |
|
"grad_norm": 3.8748316764831543, |
|
"learning_rate": 9.97000487619973e-05, |
|
"loss": 1.1122, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.00047856271664102146, |
|
"grad_norm": 3.826747417449951, |
|
"learning_rate": 9.965876921375165e-05, |
|
"loss": 1.097, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0004918561254366054, |
|
"grad_norm": 3.8462743759155273, |
|
"learning_rate": 9.961483898823678e-05, |
|
"loss": 0.8577, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0005051495342321893, |
|
"grad_norm": 4.05244255065918, |
|
"learning_rate": 9.956826043035268e-05, |
|
"loss": 0.9462, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0005184429430277732, |
|
"grad_norm": 4.053645610809326, |
|
"learning_rate": 9.951903602636166e-05, |
|
"loss": 0.9335, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0005317363518233572, |
|
"grad_norm": 4.824429988861084, |
|
"learning_rate": 9.946716840375551e-05, |
|
"loss": 1.0454, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0005450297606189411, |
|
"grad_norm": 5.300111770629883, |
|
"learning_rate": 9.94126603311153e-05, |
|
"loss": 0.9345, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.000558323169414525, |
|
"grad_norm": 4.619444847106934, |
|
"learning_rate": 9.935551471796358e-05, |
|
"loss": 1.0301, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.000571616578210109, |
|
"grad_norm": 4.485474586486816, |
|
"learning_rate": 9.92957346146091e-05, |
|
"loss": 0.9127, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0005849099870056929, |
|
"grad_norm": 5.240790843963623, |
|
"learning_rate": 9.923332321198395e-05, |
|
"loss": 1.1571, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0005982033958012768, |
|
"grad_norm": 5.170898914337158, |
|
"learning_rate": 9.916828384147331e-05, |
|
"loss": 1.0133, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0006114968045968607, |
|
"grad_norm": 6.1428446769714355, |
|
"learning_rate": 9.910061997473752e-05, |
|
"loss": 1.0003, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0006247902133924447, |
|
"grad_norm": 5.351384162902832, |
|
"learning_rate": 9.903033522352687e-05, |
|
"loss": 0.999, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0006380836221880286, |
|
"grad_norm": 5.867308616638184, |
|
"learning_rate": 9.895743333948874e-05, |
|
"loss": 1.0887, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0006513770309836126, |
|
"grad_norm": 7.773362159729004, |
|
"learning_rate": 9.888191821396744e-05, |
|
"loss": 1.1315, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0006646704397791965, |
|
"grad_norm": 9.756021499633789, |
|
"learning_rate": 9.880379387779637e-05, |
|
"loss": 1.2055, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0006779638485747804, |
|
"grad_norm": 3.411668062210083, |
|
"learning_rate": 9.872306450108292e-05, |
|
"loss": 1.4152, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0006912572573703643, |
|
"grad_norm": 2.6563198566436768, |
|
"learning_rate": 9.863973439298597e-05, |
|
"loss": 1.1962, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0007045506661659482, |
|
"grad_norm": 2.5267629623413086, |
|
"learning_rate": 9.855380800148572e-05, |
|
"loss": 1.1744, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0007178440749615322, |
|
"grad_norm": 3.2284536361694336, |
|
"learning_rate": 9.846528991314639e-05, |
|
"loss": 0.9512, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0007311374837571162, |
|
"grad_norm": 2.0135741233825684, |
|
"learning_rate": 9.837418485287127e-05, |
|
"loss": 1.0179, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0007444308925527001, |
|
"grad_norm": 2.1273906230926514, |
|
"learning_rate": 9.828049768365068e-05, |
|
"loss": 0.9856, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.000757724301348284, |
|
"grad_norm": 2.432741165161133, |
|
"learning_rate": 9.818423340630228e-05, |
|
"loss": 1.0059, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0007710177101438679, |
|
"grad_norm": 2.2927942276000977, |
|
"learning_rate": 9.808539715920414e-05, |
|
"loss": 1.2061, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0007843111189394518, |
|
"grad_norm": 2.312389850616455, |
|
"learning_rate": 9.798399421802056e-05, |
|
"loss": 0.8977, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0007976045277350357, |
|
"grad_norm": 2.4386355876922607, |
|
"learning_rate": 9.78800299954203e-05, |
|
"loss": 0.8272, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0008108979365306198, |
|
"grad_norm": 1.9699569940567017, |
|
"learning_rate": 9.777351004078783e-05, |
|
"loss": 1.087, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0008241913453262037, |
|
"grad_norm": 2.6284430027008057, |
|
"learning_rate": 9.766444003992703e-05, |
|
"loss": 1.1052, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0008374847541217876, |
|
"grad_norm": 2.237579822540283, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.9417, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0008507781629173715, |
|
"grad_norm": 2.1193315982818604, |
|
"learning_rate": 9.743867332300478e-05, |
|
"loss": 0.9962, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0008640715717129554, |
|
"grad_norm": 2.354325294494629, |
|
"learning_rate": 9.732198865788047e-05, |
|
"loss": 1.0707, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0008773649805085393, |
|
"grad_norm": 2.586552858352661, |
|
"learning_rate": 9.72027780477588e-05, |
|
"loss": 0.9973, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0008906583893041232, |
|
"grad_norm": 2.397660255432129, |
|
"learning_rate": 9.708104785584323e-05, |
|
"loss": 0.9432, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0009039517980997073, |
|
"grad_norm": 2.3507635593414307, |
|
"learning_rate": 9.695680457982713e-05, |
|
"loss": 1.0085, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0009172452068952912, |
|
"grad_norm": 2.0967140197753906, |
|
"learning_rate": 9.683005485154677e-05, |
|
"loss": 0.9675, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0009305386156908751, |
|
"grad_norm": 2.5464537143707275, |
|
"learning_rate": 9.67008054366274e-05, |
|
"loss": 0.871, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.000943832024486459, |
|
"grad_norm": 2.32565975189209, |
|
"learning_rate": 9.656906323412217e-05, |
|
"loss": 0.9763, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0009571254332820429, |
|
"grad_norm": 2.7051308155059814, |
|
"learning_rate": 9.643483527614372e-05, |
|
"loss": 1.1479, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0009704188420776268, |
|
"grad_norm": 2.812052011489868, |
|
"learning_rate": 9.629812872748901e-05, |
|
"loss": 0.8656, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0009837122508732107, |
|
"grad_norm": 2.9575998783111572, |
|
"learning_rate": 9.615895088525677e-05, |
|
"loss": 0.9026, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0009970056596687947, |
|
"grad_norm": 2.4681830406188965, |
|
"learning_rate": 9.601730917845797e-05, |
|
"loss": 1.0127, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0010102990684643786, |
|
"grad_norm": 2.636371374130249, |
|
"learning_rate": 9.587321116761938e-05, |
|
"loss": 0.9537, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0010235924772599625, |
|
"grad_norm": 4.0426554679870605, |
|
"learning_rate": 9.57266645443799e-05, |
|
"loss": 0.9712, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0010368858860555464, |
|
"grad_norm": 2.898866653442383, |
|
"learning_rate": 9.557767713108009e-05, |
|
"loss": 0.8165, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0010501792948511305, |
|
"grad_norm": 3.4302549362182617, |
|
"learning_rate": 9.542625688034449e-05, |
|
"loss": 0.9472, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0010634727036467144, |
|
"grad_norm": 2.756509304046631, |
|
"learning_rate": 9.527241187465734e-05, |
|
"loss": 0.7932, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0010767661124422984, |
|
"grad_norm": 2.6443912982940674, |
|
"learning_rate": 9.511615032593096e-05, |
|
"loss": 0.8982, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0010900595212378823, |
|
"grad_norm": 2.9568185806274414, |
|
"learning_rate": 9.49574805750675e-05, |
|
"loss": 0.8393, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0011033529300334662, |
|
"grad_norm": 3.2217254638671875, |
|
"learning_rate": 9.479641109151373e-05, |
|
"loss": 1.0166, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.00111664633882905, |
|
"grad_norm": 2.9364144802093506, |
|
"learning_rate": 9.463295047280891e-05, |
|
"loss": 0.9075, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.001129939747624634, |
|
"grad_norm": 3.6510021686553955, |
|
"learning_rate": 9.446710744412595e-05, |
|
"loss": 0.9151, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.001143233156420218, |
|
"grad_norm": 3.259711742401123, |
|
"learning_rate": 9.429889085780557e-05, |
|
"loss": 0.7931, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0011565265652158018, |
|
"grad_norm": 3.346482276916504, |
|
"learning_rate": 9.41283096928839e-05, |
|
"loss": 0.8724, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0011698199740113858, |
|
"grad_norm": 3.270686626434326, |
|
"learning_rate": 9.395537305461311e-05, |
|
"loss": 0.7726, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0011831133828069697, |
|
"grad_norm": 3.747311592102051, |
|
"learning_rate": 9.378009017397542e-05, |
|
"loss": 0.8455, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0011964067916025536, |
|
"grad_norm": 3.821293592453003, |
|
"learning_rate": 9.360247040719039e-05, |
|
"loss": 0.757, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0012097002003981375, |
|
"grad_norm": 4.054718017578125, |
|
"learning_rate": 9.342252323521545e-05, |
|
"loss": 0.9279, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0012229936091937214, |
|
"grad_norm": 3.5439743995666504, |
|
"learning_rate": 9.324025826323994e-05, |
|
"loss": 0.7374, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0012362870179893056, |
|
"grad_norm": 4.841712951660156, |
|
"learning_rate": 9.305568522017227e-05, |
|
"loss": 0.7237, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0012495804267848895, |
|
"grad_norm": 5.552839279174805, |
|
"learning_rate": 9.286881395812066e-05, |
|
"loss": 0.8263, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0012628738355804734, |
|
"grad_norm": 5.980641841888428, |
|
"learning_rate": 9.267965445186733e-05, |
|
"loss": 1.141, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0012761672443760573, |
|
"grad_norm": 5.712211608886719, |
|
"learning_rate": 9.248821679833596e-05, |
|
"loss": 1.0009, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0012894606531716412, |
|
"grad_norm": 4.681507110595703, |
|
"learning_rate": 9.229451121605279e-05, |
|
"loss": 0.8064, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0013027540619672251, |
|
"grad_norm": 5.275393009185791, |
|
"learning_rate": 9.209854804460121e-05, |
|
"loss": 0.9145, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.001316047470762809, |
|
"grad_norm": 5.1414361000061035, |
|
"learning_rate": 9.190033774406977e-05, |
|
"loss": 0.92, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.001329340879558393, |
|
"grad_norm": 6.979901313781738, |
|
"learning_rate": 9.16998908944939e-05, |
|
"loss": 0.9631, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0013426342883539769, |
|
"grad_norm": 3.0721938610076904, |
|
"learning_rate": 9.149721819529119e-05, |
|
"loss": 1.3745, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0013559276971495608, |
|
"grad_norm": 2.620919704437256, |
|
"learning_rate": 9.129233046469022e-05, |
|
"loss": 1.2247, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0013692211059451447, |
|
"grad_norm": 2.184368371963501, |
|
"learning_rate": 9.108523863915314e-05, |
|
"loss": 1.0819, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0013825145147407286, |
|
"grad_norm": 1.9781068563461304, |
|
"learning_rate": 9.087595377279192e-05, |
|
"loss": 1.1311, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0013958079235363125, |
|
"grad_norm": 1.648494005203247, |
|
"learning_rate": 9.066448703677828e-05, |
|
"loss": 1.0057, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0014091013323318964, |
|
"grad_norm": 1.681408405303955, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.9557, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0014223947411274806, |
|
"grad_norm": 1.9565364122390747, |
|
"learning_rate": 9.023505322219536e-05, |
|
"loss": 0.9797, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0014356881499230645, |
|
"grad_norm": 1.8517160415649414, |
|
"learning_rate": 9.001710906587064e-05, |
|
"loss": 1.1434, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0014489815587186484, |
|
"grad_norm": 2.314161539077759, |
|
"learning_rate": 8.9797028883159e-05, |
|
"loss": 0.9643, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0014622749675142323, |
|
"grad_norm": 2.426609516143799, |
|
"learning_rate": 8.957482442146272e-05, |
|
"loss": 0.9958, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0014755683763098162, |
|
"grad_norm": 2.338745355606079, |
|
"learning_rate": 8.935050754157344e-05, |
|
"loss": 0.8958, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0014888617851054001, |
|
"grad_norm": 2.2527472972869873, |
|
"learning_rate": 8.912409021703913e-05, |
|
"loss": 0.8262, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.001502155193900984, |
|
"grad_norm": 2.3767216205596924, |
|
"learning_rate": 8.889558453352492e-05, |
|
"loss": 0.9135, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.001515448602696568, |
|
"grad_norm": 2.25586199760437, |
|
"learning_rate": 8.866500268816803e-05, |
|
"loss": 0.9426, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0015287420114921519, |
|
"grad_norm": 2.027083158493042, |
|
"learning_rate": 8.84323569889266e-05, |
|
"loss": 1.0912, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0015420354202877358, |
|
"grad_norm": 2.3341987133026123, |
|
"learning_rate": 8.819765985392296e-05, |
|
"loss": 0.964, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0015553288290833197, |
|
"grad_norm": 2.9502508640289307, |
|
"learning_rate": 8.79609238107805e-05, |
|
"loss": 0.9789, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0015686222378789036, |
|
"grad_norm": 2.1710007190704346, |
|
"learning_rate": 8.772216149595513e-05, |
|
"loss": 0.8787, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0015819156466744875, |
|
"grad_norm": 2.401892900466919, |
|
"learning_rate": 8.748138565406081e-05, |
|
"loss": 0.9684, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0015952090554700715, |
|
"grad_norm": 2.7512049674987793, |
|
"learning_rate": 8.72386091371891e-05, |
|
"loss": 0.8961, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0016085024642656556, |
|
"grad_norm": 2.9185006618499756, |
|
"learning_rate": 8.699384490422331e-05, |
|
"loss": 0.8282, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0016217958730612395, |
|
"grad_norm": 2.936816930770874, |
|
"learning_rate": 8.674710602014671e-05, |
|
"loss": 0.8772, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0016350892818568234, |
|
"grad_norm": 3.143145799636841, |
|
"learning_rate": 8.649840565534513e-05, |
|
"loss": 0.9349, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0016483826906524073, |
|
"grad_norm": 2.681953191757202, |
|
"learning_rate": 8.624775708490402e-05, |
|
"loss": 0.9328, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.0016616760994479912, |
|
"grad_norm": 3.1768240928649902, |
|
"learning_rate": 8.59951736878998e-05, |
|
"loss": 0.8815, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0016749695082435752, |
|
"grad_norm": 3.0723233222961426, |
|
"learning_rate": 8.574066894668573e-05, |
|
"loss": 0.9168, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.001688262917039159, |
|
"grad_norm": 3.1787002086639404, |
|
"learning_rate": 8.548425644617224e-05, |
|
"loss": 0.9053, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.001701556325834743, |
|
"grad_norm": 3.2121903896331787, |
|
"learning_rate": 8.522594987310184e-05, |
|
"loss": 0.8593, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.001714849734630327, |
|
"grad_norm": 2.9943299293518066, |
|
"learning_rate": 8.49657630153185e-05, |
|
"loss": 0.9997, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0017281431434259108, |
|
"grad_norm": 3.535433292388916, |
|
"learning_rate": 8.47037097610317e-05, |
|
"loss": 0.9114, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0017414365522214947, |
|
"grad_norm": 3.203751564025879, |
|
"learning_rate": 8.443980409807512e-05, |
|
"loss": 0.9689, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0017547299610170786, |
|
"grad_norm": 3.2908623218536377, |
|
"learning_rate": 8.417406011315998e-05, |
|
"loss": 1.0035, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0017680233698126626, |
|
"grad_norm": 2.752976179122925, |
|
"learning_rate": 8.390649199112315e-05, |
|
"loss": 0.9917, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0017813167786082465, |
|
"grad_norm": 3.3330466747283936, |
|
"learning_rate": 8.363711401417e-05, |
|
"loss": 1.0635, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.0017946101874038306, |
|
"grad_norm": 3.2648167610168457, |
|
"learning_rate": 8.336594056111197e-05, |
|
"loss": 0.8795, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0018079035961994145, |
|
"grad_norm": 3.277704954147339, |
|
"learning_rate": 8.309298610659916e-05, |
|
"loss": 0.8441, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.0018211970049949984, |
|
"grad_norm": 3.603541374206543, |
|
"learning_rate": 8.281826522034764e-05, |
|
"loss": 0.9423, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.0018344904137905823, |
|
"grad_norm": 2.947814464569092, |
|
"learning_rate": 8.254179256636179e-05, |
|
"loss": 0.8583, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0018477838225861663, |
|
"grad_norm": 3.5735108852386475, |
|
"learning_rate": 8.226358290215151e-05, |
|
"loss": 1.0053, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.0018610772313817502, |
|
"grad_norm": 4.395263195037842, |
|
"learning_rate": 8.198365107794457e-05, |
|
"loss": 0.908, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.001874370640177334, |
|
"grad_norm": 4.384109973907471, |
|
"learning_rate": 8.17020120358939e-05, |
|
"loss": 0.9973, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.001887664048972918, |
|
"grad_norm": 3.42045521736145, |
|
"learning_rate": 8.141868080927996e-05, |
|
"loss": 0.8164, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.001900957457768502, |
|
"grad_norm": 3.931617259979248, |
|
"learning_rate": 8.113367252170844e-05, |
|
"loss": 0.9282, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.0019142508665640858, |
|
"grad_norm": 4.341536045074463, |
|
"learning_rate": 8.084700238630283e-05, |
|
"loss": 0.9588, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.0019275442753596697, |
|
"grad_norm": 4.990645885467529, |
|
"learning_rate": 8.055868570489247e-05, |
|
"loss": 0.9038, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0019408376841552537, |
|
"grad_norm": 5.111573219299316, |
|
"learning_rate": 8.026873786719573e-05, |
|
"loss": 0.9121, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0019541310929508376, |
|
"grad_norm": 4.322947978973389, |
|
"learning_rate": 7.997717434999861e-05, |
|
"loss": 0.8981, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.0019674245017464215, |
|
"grad_norm": 4.9193010330200195, |
|
"learning_rate": 7.968401071632855e-05, |
|
"loss": 0.9212, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.0019807179105420054, |
|
"grad_norm": 7.261848449707031, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.1311, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.0019940113193375893, |
|
"grad_norm": 9.099236488342285, |
|
"learning_rate": 7.909294577789766e-05, |
|
"loss": 0.8936, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0019940113193375893, |
|
"eval_loss": 0.9891857504844666, |
|
"eval_runtime": 3478.2034, |
|
"eval_samples_per_second": 36.426, |
|
"eval_steps_per_second": 9.106, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0020073047281331732, |
|
"grad_norm": 2.0536725521087646, |
|
"learning_rate": 7.879507602289979e-05, |
|
"loss": 1.2124, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.002020598136928757, |
|
"grad_norm": 2.2409486770629883, |
|
"learning_rate": 7.849566924927082e-05, |
|
"loss": 0.9982, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.002033891545724341, |
|
"grad_norm": 1.8208754062652588, |
|
"learning_rate": 7.819474143869414e-05, |
|
"loss": 1.0108, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.002047184954519925, |
|
"grad_norm": 1.8151131868362427, |
|
"learning_rate": 7.789230865404287e-05, |
|
"loss": 0.9152, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.002060478363315509, |
|
"grad_norm": 1.7584021091461182, |
|
"learning_rate": 7.75883870385223e-05, |
|
"loss": 0.9911, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.002073771772111093, |
|
"grad_norm": 1.859797477722168, |
|
"learning_rate": 7.728299281480833e-05, |
|
"loss": 0.8977, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.002087065180906677, |
|
"grad_norm": 1.963524580001831, |
|
"learning_rate": 7.697614228418148e-05, |
|
"loss": 0.9215, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.002100358589702261, |
|
"grad_norm": 1.8495042324066162, |
|
"learning_rate": 7.666785182565677e-05, |
|
"loss": 0.9008, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.002113651998497845, |
|
"grad_norm": 1.8056210279464722, |
|
"learning_rate": 7.635813789510941e-05, |
|
"loss": 1.0834, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.002126945407293429, |
|
"grad_norm": 2.264211893081665, |
|
"learning_rate": 7.604701702439651e-05, |
|
"loss": 1.0834, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.002140238816089013, |
|
"grad_norm": 2.530268669128418, |
|
"learning_rate": 7.573450582047457e-05, |
|
"loss": 1.0092, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.0021535322248845967, |
|
"grad_norm": 2.144922971725464, |
|
"learning_rate": 7.542062096451305e-05, |
|
"loss": 0.8963, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.0021668256336801806, |
|
"grad_norm": 2.751955986022949, |
|
"learning_rate": 7.510537921100398e-05, |
|
"loss": 0.9765, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.0021801190424757646, |
|
"grad_norm": 2.1858596801757812, |
|
"learning_rate": 7.47887973868676e-05, |
|
"loss": 1.0205, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0021934124512713485, |
|
"grad_norm": 2.055968999862671, |
|
"learning_rate": 7.447089239055428e-05, |
|
"loss": 0.8658, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0022067058600669324, |
|
"grad_norm": 2.7165462970733643, |
|
"learning_rate": 7.41516811911424e-05, |
|
"loss": 0.8954, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.0022199992688625163, |
|
"grad_norm": 2.2613511085510254, |
|
"learning_rate": 7.383118082743262e-05, |
|
"loss": 1.0412, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.0022332926776581, |
|
"grad_norm": 2.174492359161377, |
|
"learning_rate": 7.350940840703842e-05, |
|
"loss": 0.9415, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.002246586086453684, |
|
"grad_norm": 2.3029043674468994, |
|
"learning_rate": 7.318638110547288e-05, |
|
"loss": 1.0003, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.002259879495249268, |
|
"grad_norm": 2.20206356048584, |
|
"learning_rate": 7.286211616523193e-05, |
|
"loss": 1.0255, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.002273172904044852, |
|
"grad_norm": 2.3058087825775146, |
|
"learning_rate": 7.253663089487395e-05, |
|
"loss": 0.9273, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.002286466312840436, |
|
"grad_norm": 2.4932408332824707, |
|
"learning_rate": 7.220994266809591e-05, |
|
"loss": 0.9712, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.0022997597216360198, |
|
"grad_norm": 2.348066568374634, |
|
"learning_rate": 7.188206892280594e-05, |
|
"loss": 0.8345, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.0023130531304316037, |
|
"grad_norm": 2.508608341217041, |
|
"learning_rate": 7.155302716019263e-05, |
|
"loss": 1.069, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0023263465392271876, |
|
"grad_norm": 2.3190245628356934, |
|
"learning_rate": 7.122283494379076e-05, |
|
"loss": 0.7667, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.0023396399480227715, |
|
"grad_norm": 2.9227256774902344, |
|
"learning_rate": 7.089150989854385e-05, |
|
"loss": 0.8513, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.0023529333568183554, |
|
"grad_norm": 2.6290760040283203, |
|
"learning_rate": 7.055906970986336e-05, |
|
"loss": 0.853, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.0023662267656139394, |
|
"grad_norm": 2.2298004627227783, |
|
"learning_rate": 7.022553212268469e-05, |
|
"loss": 0.8112, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0023795201744095233, |
|
"grad_norm": 2.7853963375091553, |
|
"learning_rate": 6.989091494051998e-05, |
|
"loss": 0.8567, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.002392813583205107, |
|
"grad_norm": 2.7704803943634033, |
|
"learning_rate": 6.95552360245078e-05, |
|
"loss": 0.7658, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.002406106992000691, |
|
"grad_norm": 3.0688202381134033, |
|
"learning_rate": 6.92185132924598e-05, |
|
"loss": 0.8616, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.002419400400796275, |
|
"grad_norm": 3.337465763092041, |
|
"learning_rate": 6.888076471790424e-05, |
|
"loss": 0.9898, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.002432693809591859, |
|
"grad_norm": 3.242480516433716, |
|
"learning_rate": 6.85420083291266e-05, |
|
"loss": 0.7605, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.002445987218387443, |
|
"grad_norm": 3.2811238765716553, |
|
"learning_rate": 6.820226220820732e-05, |
|
"loss": 0.9702, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.002459280627183027, |
|
"grad_norm": 3.416262149810791, |
|
"learning_rate": 6.786154449005665e-05, |
|
"loss": 0.8603, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.002472574035978611, |
|
"grad_norm": 3.3136789798736572, |
|
"learning_rate": 6.751987336144648e-05, |
|
"loss": 0.9065, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.002485867444774195, |
|
"grad_norm": 4.629530906677246, |
|
"learning_rate": 6.717726706003974e-05, |
|
"loss": 0.8809, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.002499160853569779, |
|
"grad_norm": 3.5643258094787598, |
|
"learning_rate": 6.683374387341687e-05, |
|
"loss": 0.8574, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.002512454262365363, |
|
"grad_norm": 3.7351303100585938, |
|
"learning_rate": 6.648932213809962e-05, |
|
"loss": 0.9614, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.0025257476711609468, |
|
"grad_norm": 4.150784015655518, |
|
"learning_rate": 6.614402023857232e-05, |
|
"loss": 0.9416, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0025390410799565307, |
|
"grad_norm": 3.927623987197876, |
|
"learning_rate": 6.579785660630056e-05, |
|
"loss": 0.7178, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.0025523344887521146, |
|
"grad_norm": 3.512359142303467, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.918, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.0025656278975476985, |
|
"grad_norm": 3.812126874923706, |
|
"learning_rate": 6.510301809838689e-05, |
|
"loss": 0.885, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.0025789213063432824, |
|
"grad_norm": 4.095211029052734, |
|
"learning_rate": 6.475438031171574e-05, |
|
"loss": 0.8032, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.0025922147151388663, |
|
"grad_norm": 4.639618396759033, |
|
"learning_rate": 6.440495496826189e-05, |
|
"loss": 0.8512, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0026055081239344502, |
|
"grad_norm": 4.100682258605957, |
|
"learning_rate": 6.405476071959143e-05, |
|
"loss": 0.9202, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.002618801532730034, |
|
"grad_norm": 4.494955539703369, |
|
"learning_rate": 6.370381625831292e-05, |
|
"loss": 0.7531, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.002632094941525618, |
|
"grad_norm": 4.195039749145508, |
|
"learning_rate": 6.335214031707965e-05, |
|
"loss": 0.872, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.002645388350321202, |
|
"grad_norm": 4.241918087005615, |
|
"learning_rate": 6.299975166758971e-05, |
|
"loss": 0.9042, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.002658681759116786, |
|
"grad_norm": 7.482029438018799, |
|
"learning_rate": 6.264666911958404e-05, |
|
"loss": 0.968, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.00267197516791237, |
|
"grad_norm": 1.535090684890747, |
|
"learning_rate": 6.229291151984233e-05, |
|
"loss": 1.1693, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.0026852685767079537, |
|
"grad_norm": 2.0758211612701416, |
|
"learning_rate": 6.19384977511771e-05, |
|
"loss": 0.9393, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.0026985619855035376, |
|
"grad_norm": 2.1649084091186523, |
|
"learning_rate": 6.158344673142573e-05, |
|
"loss": 1.0957, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.0027118553942991216, |
|
"grad_norm": 1.799479365348816, |
|
"learning_rate": 6.122777741244067e-05, |
|
"loss": 1.1526, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.0027251488030947055, |
|
"grad_norm": 1.7623484134674072, |
|
"learning_rate": 6.0871508779077856e-05, |
|
"loss": 0.8282, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.0027384422118902894, |
|
"grad_norm": 1.7054765224456787, |
|
"learning_rate": 6.051465984818332e-05, |
|
"loss": 1.0077, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.0027517356206858733, |
|
"grad_norm": 1.9788992404937744, |
|
"learning_rate": 6.015724966757812e-05, |
|
"loss": 0.869, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.0027650290294814572, |
|
"grad_norm": 1.9093753099441528, |
|
"learning_rate": 5.979929731504158e-05, |
|
"loss": 0.9314, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.002778322438277041, |
|
"grad_norm": 1.8340977430343628, |
|
"learning_rate": 5.944082189729301e-05, |
|
"loss": 0.9003, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.002791615847072625, |
|
"grad_norm": 1.8456708192825317, |
|
"learning_rate": 5.908184254897182e-05, |
|
"loss": 1.0621, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.002804909255868209, |
|
"grad_norm": 2.148777723312378, |
|
"learning_rate": 5.872237843161612e-05, |
|
"loss": 1.094, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.002818202664663793, |
|
"grad_norm": 2.2333133220672607, |
|
"learning_rate": 5.8362448732639894e-05, |
|
"loss": 1.0306, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.002831496073459377, |
|
"grad_norm": 2.136981248855591, |
|
"learning_rate": 5.800207266430895e-05, |
|
"loss": 0.9077, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.002844789482254961, |
|
"grad_norm": 1.8241512775421143, |
|
"learning_rate": 5.764126946271526e-05, |
|
"loss": 1.0229, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.002858082891050545, |
|
"grad_norm": 2.608855962753296, |
|
"learning_rate": 5.7280058386750255e-05, |
|
"loss": 0.9708, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.002871376299846129, |
|
"grad_norm": 1.9406535625457764, |
|
"learning_rate": 5.6918458717076815e-05, |
|
"loss": 1.016, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.002884669708641713, |
|
"grad_norm": 2.244218587875366, |
|
"learning_rate": 5.655648975510014e-05, |
|
"loss": 0.9253, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.002897963117437297, |
|
"grad_norm": 2.27140474319458, |
|
"learning_rate": 5.61941708219374e-05, |
|
"loss": 0.81, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.0029112565262328807, |
|
"grad_norm": 2.125356674194336, |
|
"learning_rate": 5.583152125738651e-05, |
|
"loss": 0.8303, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.0029245499350284646, |
|
"grad_norm": 2.606123208999634, |
|
"learning_rate": 5.546856041889373e-05, |
|
"loss": 0.9575, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0029378433438240485, |
|
"grad_norm": 2.3250555992126465, |
|
"learning_rate": 5.510530768052047e-05, |
|
"loss": 0.9056, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.0029511367526196325, |
|
"grad_norm": 2.9340219497680664, |
|
"learning_rate": 5.4741782431909136e-05, |
|
"loss": 0.881, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.0029644301614152164, |
|
"grad_norm": 2.7451937198638916, |
|
"learning_rate": 5.437800407724812e-05, |
|
"loss": 0.8854, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.0029777235702108003, |
|
"grad_norm": 2.617919921875, |
|
"learning_rate": 5.401399203423606e-05, |
|
"loss": 0.921, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.002991016979006384, |
|
"grad_norm": 2.570213556289673, |
|
"learning_rate": 5.364976573304538e-05, |
|
"loss": 0.788, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.003004310387801968, |
|
"grad_norm": 2.5218234062194824, |
|
"learning_rate": 5.328534461528515e-05, |
|
"loss": 0.8243, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.003017603796597552, |
|
"grad_norm": 2.5379672050476074, |
|
"learning_rate": 5.29207481329633e-05, |
|
"loss": 0.9516, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.003030897205393136, |
|
"grad_norm": 2.763002634048462, |
|
"learning_rate": 5.2555995747448364e-05, |
|
"loss": 0.916, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.00304419061418872, |
|
"grad_norm": 2.5635182857513428, |
|
"learning_rate": 5.2191106928430644e-05, |
|
"loss": 0.8179, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.0030574840229843038, |
|
"grad_norm": 2.8950819969177246, |
|
"learning_rate": 5.182610115288295e-05, |
|
"loss": 0.8488, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0030707774317798877, |
|
"grad_norm": 3.1561074256896973, |
|
"learning_rate": 5.1460997904021005e-05, |
|
"loss": 0.8642, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.0030840708405754716, |
|
"grad_norm": 3.150465965270996, |
|
"learning_rate": 5.109581667026341e-05, |
|
"loss": 1.0022, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.0030973642493710555, |
|
"grad_norm": 2.8613460063934326, |
|
"learning_rate": 5.073057694419147e-05, |
|
"loss": 0.9774, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.0031106576581666394, |
|
"grad_norm": 2.627713203430176, |
|
"learning_rate": 5.036529822150865e-05, |
|
"loss": 0.8306, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.0031239510669622233, |
|
"grad_norm": 3.2326531410217285, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9001, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.0031372444757578073, |
|
"grad_norm": 4.3131489753723145, |
|
"learning_rate": 4.963470177849135e-05, |
|
"loss": 1.0435, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.003150537884553391, |
|
"grad_norm": 3.0774354934692383, |
|
"learning_rate": 4.9269423055808544e-05, |
|
"loss": 0.8572, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.003163831293348975, |
|
"grad_norm": 2.716817855834961, |
|
"learning_rate": 4.8904183329736596e-05, |
|
"loss": 0.8809, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.003177124702144559, |
|
"grad_norm": 2.6746835708618164, |
|
"learning_rate": 4.853900209597903e-05, |
|
"loss": 0.7999, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.003190418110940143, |
|
"grad_norm": 3.784773826599121, |
|
"learning_rate": 4.817389884711705e-05, |
|
"loss": 0.9177, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.003203711519735727, |
|
"grad_norm": 3.225341320037842, |
|
"learning_rate": 4.7808893071569374e-05, |
|
"loss": 0.7424, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.003217004928531311, |
|
"grad_norm": 3.6971752643585205, |
|
"learning_rate": 4.744400425255165e-05, |
|
"loss": 0.7437, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.003230298337326895, |
|
"grad_norm": 3.3899476528167725, |
|
"learning_rate": 4.707925186703671e-05, |
|
"loss": 0.5778, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.003243591746122479, |
|
"grad_norm": 3.9439117908477783, |
|
"learning_rate": 4.671465538471486e-05, |
|
"loss": 0.8173, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.003256885154918063, |
|
"grad_norm": 3.429020643234253, |
|
"learning_rate": 4.6350234266954626e-05, |
|
"loss": 0.7432, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.003270178563713647, |
|
"grad_norm": 4.743805885314941, |
|
"learning_rate": 4.598600796576395e-05, |
|
"loss": 0.7756, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.0032834719725092307, |
|
"grad_norm": 4.231058120727539, |
|
"learning_rate": 4.562199592275188e-05, |
|
"loss": 0.7174, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.0032967653813048147, |
|
"grad_norm": 4.012794494628906, |
|
"learning_rate": 4.5258217568090876e-05, |
|
"loss": 0.673, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.0033100587901003986, |
|
"grad_norm": 5.038784503936768, |
|
"learning_rate": 4.4894692319479544e-05, |
|
"loss": 0.786, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.0033233521988959825, |
|
"grad_norm": 6.414555072784424, |
|
"learning_rate": 4.4531439581106295e-05, |
|
"loss": 0.7456, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0033366456076915664, |
|
"grad_norm": 1.2634704113006592, |
|
"learning_rate": 4.4168478742613506e-05, |
|
"loss": 0.9463, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.0033499390164871503, |
|
"grad_norm": 1.8161299228668213, |
|
"learning_rate": 4.38058291780626e-05, |
|
"loss": 0.8844, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.0033632324252827342, |
|
"grad_norm": 2.100287437438965, |
|
"learning_rate": 4.3443510244899864e-05, |
|
"loss": 0.973, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.003376525834078318, |
|
"grad_norm": 1.925026297569275, |
|
"learning_rate": 4.308154128292318e-05, |
|
"loss": 0.8365, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.003389819242873902, |
|
"grad_norm": 1.9287835359573364, |
|
"learning_rate": 4.271994161324977e-05, |
|
"loss": 1.0131, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.003403112651669486, |
|
"grad_norm": 1.6935553550720215, |
|
"learning_rate": 4.235873053728475e-05, |
|
"loss": 0.8375, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.00341640606046507, |
|
"grad_norm": 2.200186252593994, |
|
"learning_rate": 4.199792733569107e-05, |
|
"loss": 0.8687, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.003429699469260654, |
|
"grad_norm": 1.9003078937530518, |
|
"learning_rate": 4.163755126736012e-05, |
|
"loss": 0.8753, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.0034429928780562377, |
|
"grad_norm": 4.403824329376221, |
|
"learning_rate": 4.127762156838389e-05, |
|
"loss": 1.0423, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.0034562862868518216, |
|
"grad_norm": 2.0057454109191895, |
|
"learning_rate": 4.0918157451028185e-05, |
|
"loss": 1.0278, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0034695796956474055, |
|
"grad_norm": 1.85740065574646, |
|
"learning_rate": 4.055917810270698e-05, |
|
"loss": 1.0295, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.0034828731044429895, |
|
"grad_norm": 2.0854363441467285, |
|
"learning_rate": 4.020070268495843e-05, |
|
"loss": 0.883, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.0034961665132385734, |
|
"grad_norm": 1.8586416244506836, |
|
"learning_rate": 3.9842750332421896e-05, |
|
"loss": 0.9127, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.0035094599220341573, |
|
"grad_norm": 2.047469139099121, |
|
"learning_rate": 3.94853401518167e-05, |
|
"loss": 0.9311, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.003522753330829741, |
|
"grad_norm": 2.0155272483825684, |
|
"learning_rate": 3.9128491220922156e-05, |
|
"loss": 0.806, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.003536046739625325, |
|
"grad_norm": 1.9358389377593994, |
|
"learning_rate": 3.877222258755935e-05, |
|
"loss": 0.989, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.003549340148420909, |
|
"grad_norm": 2.470038414001465, |
|
"learning_rate": 3.8416553268574285e-05, |
|
"loss": 0.8901, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.003562633557216493, |
|
"grad_norm": 2.42508864402771, |
|
"learning_rate": 3.80615022488229e-05, |
|
"loss": 0.9417, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.003575926966012077, |
|
"grad_norm": 2.4037420749664307, |
|
"learning_rate": 3.770708848015768e-05, |
|
"loss": 0.8843, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.003589220374807661, |
|
"grad_norm": 2.5379691123962402, |
|
"learning_rate": 3.735333088041596e-05, |
|
"loss": 0.9576, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.003602513783603245, |
|
"grad_norm": 2.117372989654541, |
|
"learning_rate": 3.7000248332410304e-05, |
|
"loss": 0.8887, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.003615807192398829, |
|
"grad_norm": 2.1954541206359863, |
|
"learning_rate": 3.664785968292036e-05, |
|
"loss": 0.8735, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.003629100601194413, |
|
"grad_norm": 2.301987648010254, |
|
"learning_rate": 3.629618374168711e-05, |
|
"loss": 0.9798, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.003642394009989997, |
|
"grad_norm": 2.793152332305908, |
|
"learning_rate": 3.594523928040859e-05, |
|
"loss": 1.0083, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.0036556874187855808, |
|
"grad_norm": 2.311340570449829, |
|
"learning_rate": 3.5595045031738125e-05, |
|
"loss": 0.8128, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.0036689808275811647, |
|
"grad_norm": 2.1849329471588135, |
|
"learning_rate": 3.5245619688284274e-05, |
|
"loss": 0.7448, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.0036822742363767486, |
|
"grad_norm": 2.5570812225341797, |
|
"learning_rate": 3.4896981901613104e-05, |
|
"loss": 0.8764, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.0036955676451723325, |
|
"grad_norm": 2.5506277084350586, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.8251, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.0037088610539679164, |
|
"grad_norm": 2.7928378582000732, |
|
"learning_rate": 3.420214339369944e-05, |
|
"loss": 0.8239, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.0037221544627635004, |
|
"grad_norm": 3.0748653411865234, |
|
"learning_rate": 3.38559797614277e-05, |
|
"loss": 0.9272, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.0037354478715590843, |
|
"grad_norm": 2.7534868717193604, |
|
"learning_rate": 3.351067786190038e-05, |
|
"loss": 0.8112, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.003748741280354668, |
|
"grad_norm": 2.5092287063598633, |
|
"learning_rate": 3.316625612658315e-05, |
|
"loss": 0.8439, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.003762034689150252, |
|
"grad_norm": 2.871694326400757, |
|
"learning_rate": 3.282273293996027e-05, |
|
"loss": 0.8854, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.003775328097945836, |
|
"grad_norm": 3.3398563861846924, |
|
"learning_rate": 3.248012663855353e-05, |
|
"loss": 0.8582, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.00378862150674142, |
|
"grad_norm": 3.9510514736175537, |
|
"learning_rate": 3.2138455509943366e-05, |
|
"loss": 0.8925, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.003801914915537004, |
|
"grad_norm": 3.2462363243103027, |
|
"learning_rate": 3.179773779179267e-05, |
|
"loss": 0.8432, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.0038152083243325878, |
|
"grad_norm": 3.1924219131469727, |
|
"learning_rate": 3.145799167087342e-05, |
|
"loss": 0.7827, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.0038285017331281717, |
|
"grad_norm": 3.794776678085327, |
|
"learning_rate": 3.111923528209577e-05, |
|
"loss": 0.8214, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.0038417951419237556, |
|
"grad_norm": 3.1330740451812744, |
|
"learning_rate": 3.078148670754022e-05, |
|
"loss": 0.8446, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.0038550885507193395, |
|
"grad_norm": 3.1188437938690186, |
|
"learning_rate": 3.0444763975492208e-05, |
|
"loss": 0.8169, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.0038683819595149234, |
|
"grad_norm": 3.640385866165161, |
|
"learning_rate": 3.0109085059480017e-05, |
|
"loss": 0.9897, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.0038816753683105073, |
|
"grad_norm": 4.044271469116211, |
|
"learning_rate": 2.977446787731532e-05, |
|
"loss": 0.8255, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.0038949687771060912, |
|
"grad_norm": 3.6685760021209717, |
|
"learning_rate": 2.944093029013664e-05, |
|
"loss": 0.8258, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.003908262185901675, |
|
"grad_norm": 3.5113165378570557, |
|
"learning_rate": 2.910849010145617e-05, |
|
"loss": 0.7465, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.0039215555946972595, |
|
"grad_norm": 3.854799747467041, |
|
"learning_rate": 2.8777165056209256e-05, |
|
"loss": 0.6862, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.003934849003492843, |
|
"grad_norm": 4.6844587326049805, |
|
"learning_rate": 2.8446972839807384e-05, |
|
"loss": 0.7543, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.003948142412288427, |
|
"grad_norm": 3.536104440689087, |
|
"learning_rate": 2.8117931077194065e-05, |
|
"loss": 0.6841, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.003961435821084011, |
|
"grad_norm": 4.2677903175354, |
|
"learning_rate": 2.7790057331904117e-05, |
|
"loss": 0.6677, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.003974729229879595, |
|
"grad_norm": 4.6824469566345215, |
|
"learning_rate": 2.746336910512606e-05, |
|
"loss": 0.8091, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.003988022638675179, |
|
"grad_norm": 6.547423839569092, |
|
"learning_rate": 2.7137883834768073e-05, |
|
"loss": 0.9708, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.003988022638675179, |
|
"eval_loss": 0.850064754486084, |
|
"eval_runtime": 3480.1436, |
|
"eval_samples_per_second": 36.405, |
|
"eval_steps_per_second": 9.101, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.004001316047470763, |
|
"grad_norm": 1.0784815549850464, |
|
"learning_rate": 2.6813618894527138e-05, |
|
"loss": 0.9837, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.0040146094562663465, |
|
"grad_norm": 1.8547368049621582, |
|
"learning_rate": 2.6490591592961578e-05, |
|
"loss": 0.9933, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.004027902865061931, |
|
"grad_norm": 1.9853336811065674, |
|
"learning_rate": 2.6168819172567392e-05, |
|
"loss": 1.0929, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.004041196273857514, |
|
"grad_norm": 1.8064887523651123, |
|
"learning_rate": 2.5848318808857606e-05, |
|
"loss": 0.9201, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.004054489682653099, |
|
"grad_norm": 1.9184132814407349, |
|
"learning_rate": 2.5529107609445733e-05, |
|
"loss": 0.7944, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.004067783091448682, |
|
"grad_norm": 1.625139832496643, |
|
"learning_rate": 2.521120261313241e-05, |
|
"loss": 1.0146, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.0040810765002442665, |
|
"grad_norm": 2.2314794063568115, |
|
"learning_rate": 2.4894620788996037e-05, |
|
"loss": 0.9506, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.00409436990903985, |
|
"grad_norm": 2.091251850128174, |
|
"learning_rate": 2.457937903548695e-05, |
|
"loss": 1.1053, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.004107663317835434, |
|
"grad_norm": 2.8359029293060303, |
|
"learning_rate": 2.426549417952542e-05, |
|
"loss": 0.9342, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.004120956726631018, |
|
"grad_norm": 2.370006799697876, |
|
"learning_rate": 2.3952982975603496e-05, |
|
"loss": 0.8265, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.004134250135426602, |
|
"grad_norm": 1.7976568937301636, |
|
"learning_rate": 2.3641862104890595e-05, |
|
"loss": 1.0254, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.004147543544222186, |
|
"grad_norm": 2.030775547027588, |
|
"learning_rate": 2.3332148174343254e-05, |
|
"loss": 0.915, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.00416083695301777, |
|
"grad_norm": 2.1562678813934326, |
|
"learning_rate": 2.3023857715818532e-05, |
|
"loss": 0.9138, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.004174130361813354, |
|
"grad_norm": 2.4217755794525146, |
|
"learning_rate": 2.2717007185191674e-05, |
|
"loss": 0.9814, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.004187423770608938, |
|
"grad_norm": 2.0525031089782715, |
|
"learning_rate": 2.24116129614777e-05, |
|
"loss": 1.0232, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.004200717179404522, |
|
"grad_norm": 1.8559058904647827, |
|
"learning_rate": 2.2107691345957133e-05, |
|
"loss": 0.8898, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.004214010588200106, |
|
"grad_norm": 1.8364156484603882, |
|
"learning_rate": 2.1805258561305862e-05, |
|
"loss": 0.9021, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.00422730399699569, |
|
"grad_norm": 2.247051954269409, |
|
"learning_rate": 2.1504330750729186e-05, |
|
"loss": 0.8632, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.0042405974057912734, |
|
"grad_norm": 2.744231700897217, |
|
"learning_rate": 2.120492397710022e-05, |
|
"loss": 0.9776, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.004253890814586858, |
|
"grad_norm": 2.21708345413208, |
|
"learning_rate": 2.090705422210237e-05, |
|
"loss": 0.8523, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.004267184223382441, |
|
"grad_norm": 2.1800765991210938, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.9291, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.004280477632178026, |
|
"grad_norm": 2.510124444961548, |
|
"learning_rate": 2.0315989283671473e-05, |
|
"loss": 0.8304, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.004293771040973609, |
|
"grad_norm": 2.725311517715454, |
|
"learning_rate": 2.0022825650001387e-05, |
|
"loss": 0.77, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.0043070644497691934, |
|
"grad_norm": 2.490511894226074, |
|
"learning_rate": 1.9731262132804274e-05, |
|
"loss": 0.8455, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.004320357858564777, |
|
"grad_norm": 2.235755443572998, |
|
"learning_rate": 1.9441314295107537e-05, |
|
"loss": 0.7679, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.004333651267360361, |
|
"grad_norm": 2.30128812789917, |
|
"learning_rate": 1.9152997613697183e-05, |
|
"loss": 0.7796, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.004346944676155945, |
|
"grad_norm": 2.4752769470214844, |
|
"learning_rate": 1.8866327478291546e-05, |
|
"loss": 0.8884, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.004360238084951529, |
|
"grad_norm": 2.7887279987335205, |
|
"learning_rate": 1.8581319190720035e-05, |
|
"loss": 0.7255, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.004373531493747113, |
|
"grad_norm": 2.6239068508148193, |
|
"learning_rate": 1.8297987964106115e-05, |
|
"loss": 0.8943, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.004386824902542697, |
|
"grad_norm": 2.581753969192505, |
|
"learning_rate": 1.801634892205545e-05, |
|
"loss": 0.7536, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.00440011831133828, |
|
"grad_norm": 2.731560468673706, |
|
"learning_rate": 1.7736417097848506e-05, |
|
"loss": 0.8233, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.004413411720133865, |
|
"grad_norm": 2.825575351715088, |
|
"learning_rate": 1.7458207433638223e-05, |
|
"loss": 0.8542, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.004426705128929448, |
|
"grad_norm": 3.0620334148406982, |
|
"learning_rate": 1.718173477965236e-05, |
|
"loss": 0.8842, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.004439998537725033, |
|
"grad_norm": 3.0803141593933105, |
|
"learning_rate": 1.6907013893400837e-05, |
|
"loss": 0.9173, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.004453291946520616, |
|
"grad_norm": 2.6169607639312744, |
|
"learning_rate": 1.6634059438888033e-05, |
|
"loss": 0.9778, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.0044665853553162, |
|
"grad_norm": 3.0732738971710205, |
|
"learning_rate": 1.636288598583e-05, |
|
"loss": 0.6455, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.004479878764111784, |
|
"grad_norm": 2.649909496307373, |
|
"learning_rate": 1.6093508008876857e-05, |
|
"loss": 0.7877, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.004493172172907368, |
|
"grad_norm": 3.0148892402648926, |
|
"learning_rate": 1.5825939886840037e-05, |
|
"loss": 0.8035, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.004506465581702952, |
|
"grad_norm": 3.5838184356689453, |
|
"learning_rate": 1.5560195901924894e-05, |
|
"loss": 0.7526, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.004519758990498536, |
|
"grad_norm": 3.0014920234680176, |
|
"learning_rate": 1.5296290238968303e-05, |
|
"loss": 0.8166, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.00453305239929412, |
|
"grad_norm": 3.3715362548828125, |
|
"learning_rate": 1.50342369846815e-05, |
|
"loss": 0.8953, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.004546345808089704, |
|
"grad_norm": 2.9526143074035645, |
|
"learning_rate": 1.4774050126898164e-05, |
|
"loss": 0.7074, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.004559639216885288, |
|
"grad_norm": 3.7083990573883057, |
|
"learning_rate": 1.451574355382776e-05, |
|
"loss": 0.8644, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.004572932625680872, |
|
"grad_norm": 4.097346782684326, |
|
"learning_rate": 1.425933105331429e-05, |
|
"loss": 0.787, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.004586226034476456, |
|
"grad_norm": 3.9541990756988525, |
|
"learning_rate": 1.4004826312100216e-05, |
|
"loss": 0.9168, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.0045995194432720396, |
|
"grad_norm": 4.238726615905762, |
|
"learning_rate": 1.3752242915095992e-05, |
|
"loss": 0.8646, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.004612812852067624, |
|
"grad_norm": 4.998000621795654, |
|
"learning_rate": 1.3501594344654884e-05, |
|
"loss": 0.8752, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.004626106260863207, |
|
"grad_norm": 4.411581516265869, |
|
"learning_rate": 1.3252893979853304e-05, |
|
"loss": 1.0685, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.004639399669658792, |
|
"grad_norm": 5.595761299133301, |
|
"learning_rate": 1.3006155095776707e-05, |
|
"loss": 0.6555, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.004652693078454375, |
|
"grad_norm": 6.310719013214111, |
|
"learning_rate": 1.2761390862810907e-05, |
|
"loss": 0.902, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.00466598648724996, |
|
"grad_norm": 1.1895419359207153, |
|
"learning_rate": 1.2518614345939212e-05, |
|
"loss": 1.0112, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.004679279896045543, |
|
"grad_norm": 1.4201432466506958, |
|
"learning_rate": 1.227783850404487e-05, |
|
"loss": 0.9783, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.004692573304841127, |
|
"grad_norm": 1.685895323753357, |
|
"learning_rate": 1.2039076189219517e-05, |
|
"loss": 1.0757, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.004705866713636711, |
|
"grad_norm": 1.7650642395019531, |
|
"learning_rate": 1.1802340146077045e-05, |
|
"loss": 0.8879, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.004719160122432295, |
|
"grad_norm": 1.7274045944213867, |
|
"learning_rate": 1.1567643011073392e-05, |
|
"loss": 0.8959, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.004732453531227879, |
|
"grad_norm": 2.2640771865844727, |
|
"learning_rate": 1.1334997311832002e-05, |
|
"loss": 0.8883, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.004745746940023463, |
|
"grad_norm": 1.8365882635116577, |
|
"learning_rate": 1.1104415466475087e-05, |
|
"loss": 0.9858, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.0047590403488190465, |
|
"grad_norm": 2.0218539237976074, |
|
"learning_rate": 1.0875909782960886e-05, |
|
"loss": 0.9811, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.004772333757614631, |
|
"grad_norm": 1.7274595499038696, |
|
"learning_rate": 1.0649492458426564e-05, |
|
"loss": 1.0713, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.004785627166410214, |
|
"grad_norm": 1.639711856842041, |
|
"learning_rate": 1.0425175578537299e-05, |
|
"loss": 0.9316, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.004798920575205799, |
|
"grad_norm": 1.939886450767517, |
|
"learning_rate": 1.020297111684101e-05, |
|
"loss": 0.8692, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.004812213984001382, |
|
"grad_norm": 2.1711244583129883, |
|
"learning_rate": 9.98289093412938e-06, |
|
"loss": 0.867, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.0048255073927969665, |
|
"grad_norm": 1.8200219869613647, |
|
"learning_rate": 9.764946777804646e-06, |
|
"loss": 0.8237, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.00483880080159255, |
|
"grad_norm": 2.027531623840332, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.8075, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.004852094210388134, |
|
"grad_norm": 2.07029128074646, |
|
"learning_rate": 9.335512963221732e-06, |
|
"loss": 0.8874, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.004865387619183718, |
|
"grad_norm": 2.0369820594787598, |
|
"learning_rate": 9.124046227208082e-06, |
|
"loss": 0.8996, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.004878681027979302, |
|
"grad_norm": 2.115910530090332, |
|
"learning_rate": 8.914761360846869e-06, |
|
"loss": 0.8661, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.004891974436774886, |
|
"grad_norm": 2.021585702896118, |
|
"learning_rate": 8.707669535309793e-06, |
|
"loss": 0.8805, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.00490526784557047, |
|
"grad_norm": 2.294255018234253, |
|
"learning_rate": 8.502781804708826e-06, |
|
"loss": 0.8364, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.004918561254366054, |
|
"grad_norm": 2.2873215675354004, |
|
"learning_rate": 8.30010910550611e-06, |
|
"loss": 0.8194, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.004931854663161638, |
|
"grad_norm": 2.252678871154785, |
|
"learning_rate": 8.09966225593024e-06, |
|
"loss": 0.9445, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.004945148071957222, |
|
"grad_norm": 2.10272216796875, |
|
"learning_rate": 7.901451955398792e-06, |
|
"loss": 0.8814, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.004958441480752806, |
|
"grad_norm": 2.401695489883423, |
|
"learning_rate": 7.705488783947202e-06, |
|
"loss": 0.8314, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.00497173488954839, |
|
"grad_norm": 2.3294260501861572, |
|
"learning_rate": 7.511783201664052e-06, |
|
"loss": 0.8886, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.0049850282983439735, |
|
"grad_norm": 2.6172540187835693, |
|
"learning_rate": 7.320345548132679e-06, |
|
"loss": 0.9066, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.004998321707139558, |
|
"grad_norm": 2.2096974849700928, |
|
"learning_rate": 7.131186041879357e-06, |
|
"loss": 0.7404, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.005011615115935141, |
|
"grad_norm": 2.5697643756866455, |
|
"learning_rate": 6.944314779827749e-06, |
|
"loss": 0.8426, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.005024908524730726, |
|
"grad_norm": 2.6028385162353516, |
|
"learning_rate": 6.759741736760061e-06, |
|
"loss": 0.9637, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.005038201933526309, |
|
"grad_norm": 2.43548321723938, |
|
"learning_rate": 6.577476764784546e-06, |
|
"loss": 0.9475, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.0050514953423218935, |
|
"grad_norm": 2.482872486114502, |
|
"learning_rate": 6.397529592809614e-06, |
|
"loss": 0.8607, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.005064788751117477, |
|
"grad_norm": 2.3237485885620117, |
|
"learning_rate": 6.219909826024589e-06, |
|
"loss": 0.8284, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.005078082159913061, |
|
"grad_norm": 2.8761892318725586, |
|
"learning_rate": 6.0446269453868945e-06, |
|
"loss": 0.7335, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.005091375568708645, |
|
"grad_norm": 3.1307175159454346, |
|
"learning_rate": 5.871690307116107e-06, |
|
"loss": 0.9296, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.005104668977504229, |
|
"grad_norm": 3.0153515338897705, |
|
"learning_rate": 5.701109142194422e-06, |
|
"loss": 0.7582, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.005117962386299813, |
|
"grad_norm": 2.8852033615112305, |
|
"learning_rate": 5.532892555874059e-06, |
|
"loss": 0.8387, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.005131255795095397, |
|
"grad_norm": 2.806442975997925, |
|
"learning_rate": 5.3670495271910925e-06, |
|
"loss": 0.6859, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.0051445492038909805, |
|
"grad_norm": 2.6794891357421875, |
|
"learning_rate": 5.203588908486279e-06, |
|
"loss": 0.8166, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.005157842612686565, |
|
"grad_norm": 2.7952828407287598, |
|
"learning_rate": 5.042519424932513e-06, |
|
"loss": 0.7706, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.005171136021482148, |
|
"grad_norm": 2.773052930831909, |
|
"learning_rate": 4.883849674069058e-06, |
|
"loss": 0.8396, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.005184429430277733, |
|
"grad_norm": 2.9820058345794678, |
|
"learning_rate": 4.727588125342669e-06, |
|
"loss": 0.7257, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.005197722839073316, |
|
"grad_norm": 2.9437365531921387, |
|
"learning_rate": 4.573743119655516e-06, |
|
"loss": 0.7987, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.0052110162478689005, |
|
"grad_norm": 3.896148920059204, |
|
"learning_rate": 4.422322868919937e-06, |
|
"loss": 0.7962, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.005224309656664484, |
|
"grad_norm": 3.0379815101623535, |
|
"learning_rate": 4.273335455620097e-06, |
|
"loss": 0.6742, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.005237603065460068, |
|
"grad_norm": 3.5610365867614746, |
|
"learning_rate": 4.126788832380629e-06, |
|
"loss": 0.8542, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.005250896474255652, |
|
"grad_norm": 3.813161849975586, |
|
"learning_rate": 3.982690821542035e-06, |
|
"loss": 0.7634, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.005264189883051236, |
|
"grad_norm": 4.287951946258545, |
|
"learning_rate": 3.8410491147432395e-06, |
|
"loss": 0.8481, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.00527748329184682, |
|
"grad_norm": 3.525927782058716, |
|
"learning_rate": 3.7018712725109926e-06, |
|
"loss": 0.6304, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.005290776700642404, |
|
"grad_norm": 4.333404541015625, |
|
"learning_rate": 3.5651647238562904e-06, |
|
"loss": 0.8299, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.005304070109437988, |
|
"grad_norm": 5.827525615692139, |
|
"learning_rate": 3.430936765877857e-06, |
|
"loss": 0.9442, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.005317363518233572, |
|
"grad_norm": 7.1007280349731445, |
|
"learning_rate": 3.299194563372604e-06, |
|
"loss": 0.6819, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.005330656927029156, |
|
"grad_norm": 1.2109785079956055, |
|
"learning_rate": 3.1699451484532463e-06, |
|
"loss": 1.0173, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.00534395033582474, |
|
"grad_norm": 1.6271941661834717, |
|
"learning_rate": 3.0431954201728784e-06, |
|
"loss": 0.9225, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.005357243744620324, |
|
"grad_norm": 1.5182850360870361, |
|
"learning_rate": 2.9189521441567726e-06, |
|
"loss": 0.8122, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.0053705371534159075, |
|
"grad_norm": 1.6241300106048584, |
|
"learning_rate": 2.797221952241219e-06, |
|
"loss": 0.6472, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.005383830562211492, |
|
"grad_norm": 2.0764918327331543, |
|
"learning_rate": 2.6780113421195298e-06, |
|
"loss": 0.806, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.005397123971007075, |
|
"grad_norm": 1.7580714225769043, |
|
"learning_rate": 2.561326676995218e-06, |
|
"loss": 0.7895, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.00541041737980266, |
|
"grad_norm": 1.7050628662109375, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.9054, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.005423710788598243, |
|
"grad_norm": 1.7846729755401611, |
|
"learning_rate": 2.3355599600729915e-06, |
|
"loss": 0.8994, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.0054370041973938275, |
|
"grad_norm": 2.0640039443969727, |
|
"learning_rate": 2.2264899592121744e-06, |
|
"loss": 0.9436, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.005450297606189411, |
|
"grad_norm": 1.7736494541168213, |
|
"learning_rate": 2.1199700045797077e-06, |
|
"loss": 0.8843, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.005463591014984995, |
|
"grad_norm": 1.889944076538086, |
|
"learning_rate": 2.0160057819794466e-06, |
|
"loss": 0.8959, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.005476884423780579, |
|
"grad_norm": 2.189314603805542, |
|
"learning_rate": 1.9146028407958484e-06, |
|
"loss": 1.0046, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.005490177832576163, |
|
"grad_norm": 1.8977439403533936, |
|
"learning_rate": 1.8157665936977263e-06, |
|
"loss": 0.8848, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.005503471241371747, |
|
"grad_norm": 2.0931496620178223, |
|
"learning_rate": 1.7195023163493252e-06, |
|
"loss": 1.0084, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.005516764650167331, |
|
"grad_norm": 2.239429235458374, |
|
"learning_rate": 1.6258151471287396e-06, |
|
"loss": 0.8895, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.0055300580589629144, |
|
"grad_norm": 2.0651838779449463, |
|
"learning_rate": 1.5347100868536246e-06, |
|
"loss": 0.8107, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.005543351467758499, |
|
"grad_norm": 2.068511724472046, |
|
"learning_rate": 1.4461919985142735e-06, |
|
"loss": 0.7925, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.005556644876554082, |
|
"grad_norm": 2.0597782135009766, |
|
"learning_rate": 1.3602656070140275e-06, |
|
"loss": 0.827, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.005569938285349667, |
|
"grad_norm": 2.230896234512329, |
|
"learning_rate": 1.27693549891707e-06, |
|
"loss": 0.7999, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.00558323169414525, |
|
"grad_norm": 2.3435306549072266, |
|
"learning_rate": 1.196206122203647e-06, |
|
"loss": 0.9503, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.0055965251029408344, |
|
"grad_norm": 2.1370625495910645, |
|
"learning_rate": 1.1180817860325599e-06, |
|
"loss": 0.862, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.005609818511736418, |
|
"grad_norm": 2.205033540725708, |
|
"learning_rate": 1.0425666605112517e-06, |
|
"loss": 0.8852, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.005623111920532002, |
|
"grad_norm": 2.5713577270507812, |
|
"learning_rate": 9.696647764731337e-07, |
|
"loss": 0.8009, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.005636405329327586, |
|
"grad_norm": 3.0529072284698486, |
|
"learning_rate": 8.993800252624862e-07, |
|
"loss": 0.9193, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.00564969873812317, |
|
"grad_norm": 2.8869714736938477, |
|
"learning_rate": 8.317161585266964e-07, |
|
"loss": 0.9436, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.005662992146918754, |
|
"grad_norm": 2.595672845840454, |
|
"learning_rate": 7.666767880160464e-07, |
|
"loss": 0.8529, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.005676285555714338, |
|
"grad_norm": 2.3774523735046387, |
|
"learning_rate": 7.042653853909064e-07, |
|
"loss": 0.9693, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.005689578964509922, |
|
"grad_norm": 2.5005154609680176, |
|
"learning_rate": 6.444852820364222e-07, |
|
"loss": 0.7297, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.005702872373305506, |
|
"grad_norm": 2.437258243560791, |
|
"learning_rate": 5.87339668884701e-07, |
|
"loss": 0.7768, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.00571616578210109, |
|
"grad_norm": 2.3614542484283447, |
|
"learning_rate": 5.328315962444874e-07, |
|
"loss": 0.7939, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.005729459190896674, |
|
"grad_norm": 2.5789074897766113, |
|
"learning_rate": 4.809639736383431e-07, |
|
"loss": 0.7969, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.005742752599692258, |
|
"grad_norm": 3.0158462524414062, |
|
"learning_rate": 4.317395696473214e-07, |
|
"loss": 0.9557, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.005756046008487841, |
|
"grad_norm": 2.9326164722442627, |
|
"learning_rate": 3.851610117632354e-07, |
|
"loss": 0.877, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.005769339417283426, |
|
"grad_norm": 3.167335033416748, |
|
"learning_rate": 3.4123078624834216e-07, |
|
"loss": 0.7724, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.005782632826079009, |
|
"grad_norm": 2.828343152999878, |
|
"learning_rate": 2.9995123800270476e-07, |
|
"loss": 0.7906, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.005795926234874594, |
|
"grad_norm": 2.650979518890381, |
|
"learning_rate": 2.613245704389644e-07, |
|
"loss": 0.8052, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.005809219643670177, |
|
"grad_norm": 2.7367007732391357, |
|
"learning_rate": 2.2535284536476242e-07, |
|
"loss": 0.7699, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.005822513052465761, |
|
"grad_norm": 2.9800891876220703, |
|
"learning_rate": 1.920379828726726e-07, |
|
"loss": 0.6601, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.005835806461261345, |
|
"grad_norm": 3.0351688861846924, |
|
"learning_rate": 1.6138176123770554e-07, |
|
"loss": 0.692, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.005849099870056929, |
|
"grad_norm": 3.4861412048339844, |
|
"learning_rate": 1.333858168224178e-07, |
|
"loss": 0.7502, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.005862393278852513, |
|
"grad_norm": 3.223451614379883, |
|
"learning_rate": 1.0805164398952072e-07, |
|
"loss": 0.7937, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.005875686687648097, |
|
"grad_norm": 3.0218589305877686, |
|
"learning_rate": 8.53805950221498e-08, |
|
"loss": 0.8031, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.0058889800964436806, |
|
"grad_norm": 3.249140501022339, |
|
"learning_rate": 6.537388005167233e-08, |
|
"loss": 0.6787, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.005902273505239265, |
|
"grad_norm": 3.6842355728149414, |
|
"learning_rate": 4.8032566993089225e-08, |
|
"loss": 0.839, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.005915566914034848, |
|
"grad_norm": 3.5570058822631836, |
|
"learning_rate": 3.3357581488030475e-08, |
|
"loss": 0.6737, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.005928860322830433, |
|
"grad_norm": 3.5006725788116455, |
|
"learning_rate": 2.134970685536697e-08, |
|
"loss": 0.7928, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.005942153731626016, |
|
"grad_norm": 4.718527317047119, |
|
"learning_rate": 1.200958404936059e-08, |
|
"loss": 0.8572, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.0059554471404216006, |
|
"grad_norm": 4.4138689041137695, |
|
"learning_rate": 5.337711625497121e-09, |
|
"loss": 0.9534, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.005968740549217184, |
|
"grad_norm": 5.295405387878418, |
|
"learning_rate": 1.3344457138297906e-09, |
|
"loss": 0.7238, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.005982033958012768, |
|
"grad_norm": 7.403181552886963, |
|
"learning_rate": 0.0, |
|
"loss": 0.9382, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.005982033958012768, |
|
"eval_loss": 0.8241426348686218, |
|
"eval_runtime": 3479.6362, |
|
"eval_samples_per_second": 36.411, |
|
"eval_steps_per_second": 9.103, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 150, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4139709881266995e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|