|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 2282, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008764241893076249, |
|
"grad_norm": 3.7227299213409424, |
|
"learning_rate": 9.999654593547529e-05, |
|
"loss": 0.8928, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.017528483786152498, |
|
"grad_norm": 2.602724552154541, |
|
"learning_rate": 9.998460664009619e-05, |
|
"loss": 0.7339, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.026292725679228746, |
|
"grad_norm": 2.0315356254577637, |
|
"learning_rate": 9.996414173332143e-05, |
|
"loss": 0.8093, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.035056967572304996, |
|
"grad_norm": 2.265153408050537, |
|
"learning_rate": 9.993515509371871e-05, |
|
"loss": 0.7516, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04382120946538125, |
|
"grad_norm": 1.9205143451690674, |
|
"learning_rate": 9.989765221491895e-05, |
|
"loss": 0.7691, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05258545135845749, |
|
"grad_norm": 2.219170331954956, |
|
"learning_rate": 9.985164020457504e-05, |
|
"loss": 0.7539, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06134969325153374, |
|
"grad_norm": 2.3345134258270264, |
|
"learning_rate": 9.979712778301489e-05, |
|
"loss": 0.6932, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07011393514460999, |
|
"grad_norm": 1.9078369140625, |
|
"learning_rate": 9.973412528158862e-05, |
|
"loss": 0.7058, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07887817703768624, |
|
"grad_norm": 2.3402259349823, |
|
"learning_rate": 9.966264464071064e-05, |
|
"loss": 0.7181, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0876424189307625, |
|
"grad_norm": 1.6266059875488281, |
|
"learning_rate": 9.958269940759659e-05, |
|
"loss": 0.6715, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09640666082383874, |
|
"grad_norm": 1.8910496234893799, |
|
"learning_rate": 9.94943047336958e-05, |
|
"loss": 0.6983, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10517090271691498, |
|
"grad_norm": 1.970109224319458, |
|
"learning_rate": 9.939747737181993e-05, |
|
"loss": 0.7639, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11393514460999124, |
|
"grad_norm": 1.9310839176177979, |
|
"learning_rate": 9.929223567296766e-05, |
|
"loss": 0.6289, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12269938650306748, |
|
"grad_norm": 1.5292866230010986, |
|
"learning_rate": 9.917859958284699e-05, |
|
"loss": 0.6575, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13146362839614373, |
|
"grad_norm": 1.3497545719146729, |
|
"learning_rate": 9.905659063809492e-05, |
|
"loss": 0.6986, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14022787028921999, |
|
"grad_norm": 2.1464974880218506, |
|
"learning_rate": 9.892623196219586e-05, |
|
"loss": 0.7055, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14899211218229624, |
|
"grad_norm": 2.5107662677764893, |
|
"learning_rate": 9.878754826109915e-05, |
|
"loss": 0.7421, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15775635407537247, |
|
"grad_norm": 1.9968948364257812, |
|
"learning_rate": 9.864056581853674e-05, |
|
"loss": 0.6956, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16652059596844873, |
|
"grad_norm": 1.5380990505218506, |
|
"learning_rate": 9.84853124910418e-05, |
|
"loss": 0.7439, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.175284837861525, |
|
"grad_norm": 1.8293559551239014, |
|
"learning_rate": 9.832181770266927e-05, |
|
"loss": 0.6924, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18404907975460122, |
|
"grad_norm": 1.5421850681304932, |
|
"learning_rate": 9.815011243941939e-05, |
|
"loss": 0.6443, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.19281332164767748, |
|
"grad_norm": 1.7461329698562622, |
|
"learning_rate": 9.797022924336504e-05, |
|
"loss": 0.7049, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.20157756354075373, |
|
"grad_norm": 1.9567017555236816, |
|
"learning_rate": 9.778220220648439e-05, |
|
"loss": 0.6797, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.21034180543382996, |
|
"grad_norm": 2.222736358642578, |
|
"learning_rate": 9.75860669641996e-05, |
|
"loss": 0.6958, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.21910604732690622, |
|
"grad_norm": 1.6702378988265991, |
|
"learning_rate": 9.738186068862311e-05, |
|
"loss": 0.6895, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22787028921998248, |
|
"grad_norm": 1.9190706014633179, |
|
"learning_rate": 9.716962208151269e-05, |
|
"loss": 0.6747, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2366345311130587, |
|
"grad_norm": 2.277719020843506, |
|
"learning_rate": 9.69493913669366e-05, |
|
"loss": 0.704, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.24539877300613497, |
|
"grad_norm": 1.6245454549789429, |
|
"learning_rate": 9.672121028365014e-05, |
|
"loss": 0.6023, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2541630148992112, |
|
"grad_norm": 1.7889143228530884, |
|
"learning_rate": 9.648512207718532e-05, |
|
"loss": 0.7218, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.26292725679228746, |
|
"grad_norm": 2.422360897064209, |
|
"learning_rate": 9.624117149165466e-05, |
|
"loss": 0.6986, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.27169149868536374, |
|
"grad_norm": 2.14021372795105, |
|
"learning_rate": 9.598940476127131e-05, |
|
"loss": 0.6476, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.28045574057843997, |
|
"grad_norm": 1.9130802154541016, |
|
"learning_rate": 9.57298696015866e-05, |
|
"loss": 0.7056, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2892199824715162, |
|
"grad_norm": 2.4466943740844727, |
|
"learning_rate": 9.546261520044675e-05, |
|
"loss": 0.6542, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2979842243645925, |
|
"grad_norm": 1.6736353635787964, |
|
"learning_rate": 9.518769220867076e-05, |
|
"loss": 0.6648, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3067484662576687, |
|
"grad_norm": 1.745496153831482, |
|
"learning_rate": 9.490515273045085e-05, |
|
"loss": 0.6411, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.31551270815074495, |
|
"grad_norm": 2.82519268989563, |
|
"learning_rate": 9.461505031347753e-05, |
|
"loss": 0.6597, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.32427695004382123, |
|
"grad_norm": 2.2791597843170166, |
|
"learning_rate": 9.431743993879119e-05, |
|
"loss": 0.6931, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.33304119193689746, |
|
"grad_norm": 1.6820751428604126, |
|
"learning_rate": 9.401237801036176e-05, |
|
"loss": 0.6529, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3418054338299737, |
|
"grad_norm": 1.7358545064926147, |
|
"learning_rate": 9.369992234439899e-05, |
|
"loss": 0.7001, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.35056967572305, |
|
"grad_norm": 1.7271596193313599, |
|
"learning_rate": 9.338013215839495e-05, |
|
"loss": 0.6801, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3593339176161262, |
|
"grad_norm": 1.7952642440795898, |
|
"learning_rate": 9.305306805990093e-05, |
|
"loss": 0.7023, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.36809815950920244, |
|
"grad_norm": 2.275848865509033, |
|
"learning_rate": 9.271879203504094e-05, |
|
"loss": 0.6641, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3768624014022787, |
|
"grad_norm": 1.435059666633606, |
|
"learning_rate": 9.237736743676386e-05, |
|
"loss": 0.6739, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.38562664329535495, |
|
"grad_norm": 1.6760456562042236, |
|
"learning_rate": 9.202885897283674e-05, |
|
"loss": 0.6245, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3943908851884312, |
|
"grad_norm": 1.5440309047698975, |
|
"learning_rate": 9.167333269358109e-05, |
|
"loss": 0.6436, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.40315512708150747, |
|
"grad_norm": 1.798532485961914, |
|
"learning_rate": 9.131085597935487e-05, |
|
"loss": 0.6097, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4119193689745837, |
|
"grad_norm": 1.5643911361694336, |
|
"learning_rate": 9.094149752778233e-05, |
|
"loss": 0.7032, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.42068361086765993, |
|
"grad_norm": 1.9498308897018433, |
|
"learning_rate": 9.056532734073434e-05, |
|
"loss": 0.6429, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4294478527607362, |
|
"grad_norm": 2.7362070083618164, |
|
"learning_rate": 9.018241671106134e-05, |
|
"loss": 0.6518, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.43821209465381245, |
|
"grad_norm": 1.755315899848938, |
|
"learning_rate": 8.979283820908174e-05, |
|
"loss": 0.6145, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4469763365468887, |
|
"grad_norm": 1.6948930025100708, |
|
"learning_rate": 8.939666566882821e-05, |
|
"loss": 0.6809, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.45574057843996496, |
|
"grad_norm": 1.9926345348358154, |
|
"learning_rate": 8.899397417405442e-05, |
|
"loss": 0.6811, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4645048203330412, |
|
"grad_norm": 2.0141072273254395, |
|
"learning_rate": 8.858484004400496e-05, |
|
"loss": 0.6326, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4732690622261174, |
|
"grad_norm": 1.5361511707305908, |
|
"learning_rate": 8.816934081895105e-05, |
|
"loss": 0.6316, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4820333041191937, |
|
"grad_norm": 2.0940563678741455, |
|
"learning_rate": 8.774755524549503e-05, |
|
"loss": 0.7468, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.49079754601226994, |
|
"grad_norm": 2.6678926944732666, |
|
"learning_rate": 8.731956326164591e-05, |
|
"loss": 0.6853, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.49956178790534617, |
|
"grad_norm": 2.2798871994018555, |
|
"learning_rate": 8.688544598166935e-05, |
|
"loss": 0.6488, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5083260297984225, |
|
"grad_norm": 2.1741902828216553, |
|
"learning_rate": 8.644528568071472e-05, |
|
"loss": 0.7044, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5170902716914987, |
|
"grad_norm": 1.9935109615325928, |
|
"learning_rate": 8.599916577922198e-05, |
|
"loss": 0.6689, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5258545135845749, |
|
"grad_norm": 2.2509982585906982, |
|
"learning_rate": 8.554717082711164e-05, |
|
"loss": 0.6697, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5346187554776511, |
|
"grad_norm": 1.5349043607711792, |
|
"learning_rate": 8.508938648776062e-05, |
|
"loss": 0.6741, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5433829973707275, |
|
"grad_norm": 1.8080490827560425, |
|
"learning_rate": 8.462589952176709e-05, |
|
"loss": 0.6448, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5521472392638037, |
|
"grad_norm": 1.4415456056594849, |
|
"learning_rate": 8.415679777050735e-05, |
|
"loss": 0.6585, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5609114811568799, |
|
"grad_norm": 1.9184411764144897, |
|
"learning_rate": 8.368217013948786e-05, |
|
"loss": 0.6934, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5696757230499562, |
|
"grad_norm": 2.0871620178222656, |
|
"learning_rate": 8.320210658149562e-05, |
|
"loss": 0.642, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5784399649430324, |
|
"grad_norm": 1.835964322090149, |
|
"learning_rate": 8.271669807955007e-05, |
|
"loss": 0.7067, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5872042068361086, |
|
"grad_norm": 2.068668842315674, |
|
"learning_rate": 8.222603662965974e-05, |
|
"loss": 0.6759, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.595968448729185, |
|
"grad_norm": 1.837086796760559, |
|
"learning_rate": 8.173021522338687e-05, |
|
"loss": 0.6301, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6047326906222612, |
|
"grad_norm": 1.995373010635376, |
|
"learning_rate": 8.122932783022342e-05, |
|
"loss": 0.688, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6134969325153374, |
|
"grad_norm": 1.736932635307312, |
|
"learning_rate": 8.072346937978168e-05, |
|
"loss": 0.6538, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6222611744084137, |
|
"grad_norm": 2.074141025543213, |
|
"learning_rate": 8.02127357438029e-05, |
|
"loss": 0.7275, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6310254163014899, |
|
"grad_norm": 2.758039951324463, |
|
"learning_rate": 7.969722371798753e-05, |
|
"loss": 0.7139, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6397896581945661, |
|
"grad_norm": 1.8415740728378296, |
|
"learning_rate": 7.917703100365005e-05, |
|
"loss": 0.5433, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6485539000876425, |
|
"grad_norm": 2.0133821964263916, |
|
"learning_rate": 7.865225618920248e-05, |
|
"loss": 0.6369, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6573181419807187, |
|
"grad_norm": 1.943287968635559, |
|
"learning_rate": 7.812299873146955e-05, |
|
"loss": 0.6168, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6660823838737949, |
|
"grad_norm": 1.7971194982528687, |
|
"learning_rate": 7.758935893683939e-05, |
|
"loss": 0.6518, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6748466257668712, |
|
"grad_norm": 1.5026414394378662, |
|
"learning_rate": 7.705143794225315e-05, |
|
"loss": 0.6498, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6836108676599474, |
|
"grad_norm": 1.8013416528701782, |
|
"learning_rate": 7.65093376960372e-05, |
|
"loss": 0.6598, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6923751095530236, |
|
"grad_norm": 1.8364579677581787, |
|
"learning_rate": 7.596316093858172e-05, |
|
"loss": 0.6934, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7011393514461, |
|
"grad_norm": 1.7535738945007324, |
|
"learning_rate": 7.541301118286894e-05, |
|
"loss": 0.5668, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7099035933391762, |
|
"grad_norm": 1.9401272535324097, |
|
"learning_rate": 7.485899269485506e-05, |
|
"loss": 0.6852, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7186678352322524, |
|
"grad_norm": 1.886604905128479, |
|
"learning_rate": 7.430121047370955e-05, |
|
"loss": 0.6845, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7274320771253286, |
|
"grad_norm": 1.9620131254196167, |
|
"learning_rate": 7.37397702319153e-05, |
|
"loss": 0.6344, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7361963190184049, |
|
"grad_norm": 1.949866771697998, |
|
"learning_rate": 7.32314343370074e-05, |
|
"loss": 0.6247, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7449605609114811, |
|
"grad_norm": 1.5609701871871948, |
|
"learning_rate": 7.266333756059938e-05, |
|
"loss": 0.604, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7537248028045574, |
|
"grad_norm": 2.5070557594299316, |
|
"learning_rate": 7.209189317790467e-05, |
|
"loss": 0.6262, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7624890446976337, |
|
"grad_norm": 1.672145128250122, |
|
"learning_rate": 7.151720949069814e-05, |
|
"loss": 0.6188, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7712532865907099, |
|
"grad_norm": 1.456264615058899, |
|
"learning_rate": 7.093939541467697e-05, |
|
"loss": 0.6026, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7800175284837861, |
|
"grad_norm": 1.967781901359558, |
|
"learning_rate": 7.035856045881851e-05, |
|
"loss": 0.6649, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7887817703768624, |
|
"grad_norm": 1.6206531524658203, |
|
"learning_rate": 6.977481470462593e-05, |
|
"loss": 0.6018, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7975460122699386, |
|
"grad_norm": 2.240525960922241, |
|
"learning_rate": 6.918826878526527e-05, |
|
"loss": 0.5912, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8063102541630149, |
|
"grad_norm": 1.8358842134475708, |
|
"learning_rate": 6.859903386459781e-05, |
|
"loss": 0.6435, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8150744960560912, |
|
"grad_norm": 2.574652671813965, |
|
"learning_rate": 6.80072216161121e-05, |
|
"loss": 0.6789, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8238387379491674, |
|
"grad_norm": 1.668445348739624, |
|
"learning_rate": 6.741294420175927e-05, |
|
"loss": 0.6103, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8326029798422436, |
|
"grad_norm": 1.7513461112976074, |
|
"learning_rate": 6.681631425069566e-05, |
|
"loss": 0.5696, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8413672217353199, |
|
"grad_norm": 2.508164405822754, |
|
"learning_rate": 6.621744483793715e-05, |
|
"loss": 0.653, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8501314636283961, |
|
"grad_norm": 1.9786324501037598, |
|
"learning_rate": 6.56164494629288e-05, |
|
"loss": 0.635, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8588957055214724, |
|
"grad_norm": 1.8781009912490845, |
|
"learning_rate": 6.501344202803414e-05, |
|
"loss": 0.7071, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8676599474145487, |
|
"grad_norm": 1.819648027420044, |
|
"learning_rate": 6.440853681694801e-05, |
|
"loss": 0.5952, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8764241893076249, |
|
"grad_norm": 1.8697113990783691, |
|
"learning_rate": 6.380184847303727e-05, |
|
"loss": 0.6179, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8851884312007011, |
|
"grad_norm": 1.6153459548950195, |
|
"learning_rate": 6.319349197761317e-05, |
|
"loss": 0.6071, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8939526730937774, |
|
"grad_norm": 1.5980494022369385, |
|
"learning_rate": 6.25835826281398e-05, |
|
"loss": 0.588, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9027169149868537, |
|
"grad_norm": 1.509323239326477, |
|
"learning_rate": 6.197223601638266e-05, |
|
"loss": 0.6636, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9114811568799299, |
|
"grad_norm": 1.91567862033844, |
|
"learning_rate": 6.135956800650128e-05, |
|
"loss": 0.6727, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9202453987730062, |
|
"grad_norm": 1.7963509559631348, |
|
"learning_rate": 6.074569471309032e-05, |
|
"loss": 0.5768, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9290096406660824, |
|
"grad_norm": 1.8217496871948242, |
|
"learning_rate": 6.013073247917326e-05, |
|
"loss": 0.6243, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9377738825591586, |
|
"grad_norm": 1.648887276649475, |
|
"learning_rate": 5.951479785415266e-05, |
|
"loss": 0.6717, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9465381244522348, |
|
"grad_norm": 1.668747067451477, |
|
"learning_rate": 5.889800757172146e-05, |
|
"loss": 0.5823, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9553023663453112, |
|
"grad_norm": 2.224945545196533, |
|
"learning_rate": 5.8280478527739235e-05, |
|
"loss": 0.6203, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9640666082383874, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.7724167474641534e-05, |
|
"loss": 0.6271, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9728308501314636, |
|
"grad_norm": 2.2267634868621826, |
|
"learning_rate": 5.7105557315385284e-05, |
|
"loss": 0.6614, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9815950920245399, |
|
"grad_norm": 1.7963807582855225, |
|
"learning_rate": 5.6486548104880555e-05, |
|
"loss": 0.7113, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9903593339176161, |
|
"grad_norm": 2.0616729259490967, |
|
"learning_rate": 5.586725715952452e-05, |
|
"loss": 0.6077, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9991235758106923, |
|
"grad_norm": 1.7366951704025269, |
|
"learning_rate": 5.5247801849109526e-05, |
|
"loss": 0.634, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.0078878177037687, |
|
"grad_norm": 1.4753001928329468, |
|
"learning_rate": 5.462829957457888e-05, |
|
"loss": 0.44, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.016652059596845, |
|
"grad_norm": 1.493257999420166, |
|
"learning_rate": 5.400886774577667e-05, |
|
"loss": 0.3635, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.0254163014899211, |
|
"grad_norm": 1.7157448530197144, |
|
"learning_rate": 5.338962375919589e-05, |
|
"loss": 0.3525, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.0341805433829974, |
|
"grad_norm": 1.7190569639205933, |
|
"learning_rate": 5.277068497572914e-05, |
|
"loss": 0.3751, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.0429447852760736, |
|
"grad_norm": 1.5113599300384521, |
|
"learning_rate": 5.215216869842604e-05, |
|
"loss": 0.3857, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.0517090271691498, |
|
"grad_norm": 1.6809605360031128, |
|
"learning_rate": 5.1534192150261676e-05, |
|
"loss": 0.3611, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.060473269062226, |
|
"grad_norm": 1.6623985767364502, |
|
"learning_rate": 5.091687245192006e-05, |
|
"loss": 0.3558, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.0692375109553023, |
|
"grad_norm": 2.009783983230591, |
|
"learning_rate": 5.030032659959722e-05, |
|
"loss": 0.3654, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.0780017528483785, |
|
"grad_norm": 1.6117240190505981, |
|
"learning_rate": 4.968467144282759e-05, |
|
"loss": 0.3715, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.086765994741455, |
|
"grad_norm": 1.3283199071884155, |
|
"learning_rate": 4.9070023662338523e-05, |
|
"loss": 0.3736, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0955302366345312, |
|
"grad_norm": 1.8791778087615967, |
|
"learning_rate": 4.8456499747936465e-05, |
|
"loss": 0.4082, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.1042944785276074, |
|
"grad_norm": 1.7714508771896362, |
|
"learning_rate": 4.7844215976429576e-05, |
|
"loss": 0.3743, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.1130587204206837, |
|
"grad_norm": 1.773903489112854, |
|
"learning_rate": 4.723328838959057e-05, |
|
"loss": 0.3858, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.1218229623137599, |
|
"grad_norm": 1.4871599674224854, |
|
"learning_rate": 4.662383277216418e-05, |
|
"loss": 0.3486, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.1305872042068361, |
|
"grad_norm": 1.3975831270217896, |
|
"learning_rate": 4.601596462992326e-05, |
|
"loss": 0.3657, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.1393514460999123, |
|
"grad_norm": 1.5370299816131592, |
|
"learning_rate": 4.540979916777783e-05, |
|
"loss": 0.3371, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.1481156879929886, |
|
"grad_norm": 1.5495011806488037, |
|
"learning_rate": 4.480545126794115e-05, |
|
"loss": 0.3357, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.1568799298860648, |
|
"grad_norm": 1.694589376449585, |
|
"learning_rate": 4.420303546815678e-05, |
|
"loss": 0.3645, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.165644171779141, |
|
"grad_norm": 1.985825777053833, |
|
"learning_rate": 4.360266593999124e-05, |
|
"loss": 0.3546, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.1744084136722173, |
|
"grad_norm": 1.5808460712432861, |
|
"learning_rate": 4.300445646719573e-05, |
|
"loss": 0.3638, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.1831726555652935, |
|
"grad_norm": 1.7759652137756348, |
|
"learning_rate": 4.240852042414162e-05, |
|
"loss": 0.4059, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.19193689745837, |
|
"grad_norm": 1.7563869953155518, |
|
"learning_rate": 4.181497075433334e-05, |
|
"loss": 0.3446, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.2007011393514462, |
|
"grad_norm": 1.9143909215927124, |
|
"learning_rate": 4.1223919949003045e-05, |
|
"loss": 0.3487, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.2094653812445224, |
|
"grad_norm": 1.8407511711120605, |
|
"learning_rate": 4.0635480025790926e-05, |
|
"loss": 0.3649, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.2182296231375986, |
|
"grad_norm": 1.7706880569458008, |
|
"learning_rate": 4.0049762507515355e-05, |
|
"loss": 0.3612, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.2269938650306749, |
|
"grad_norm": 1.0870561599731445, |
|
"learning_rate": 3.9466878401036686e-05, |
|
"loss": 0.3401, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.235758106923751, |
|
"grad_norm": 1.91828453540802, |
|
"learning_rate": 3.8886938176219024e-05, |
|
"loss": 0.3327, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.2445223488168273, |
|
"grad_norm": 1.61056649684906, |
|
"learning_rate": 3.8310051744993514e-05, |
|
"loss": 0.3386, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.2532865907099036, |
|
"grad_norm": 2.071869373321533, |
|
"learning_rate": 3.773632844052767e-05, |
|
"loss": 0.363, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.2620508326029798, |
|
"grad_norm": 1.671288251876831, |
|
"learning_rate": 3.7165876996504125e-05, |
|
"loss": 0.3828, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.270815074496056, |
|
"grad_norm": 1.8811005353927612, |
|
"learning_rate": 3.659880552651317e-05, |
|
"loss": 0.3551, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.2795793163891322, |
|
"grad_norm": 1.5208740234375, |
|
"learning_rate": 3.6035221503562775e-05, |
|
"loss": 0.3566, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.2883435582822087, |
|
"grad_norm": 1.7736235857009888, |
|
"learning_rate": 3.547523173970989e-05, |
|
"loss": 0.3629, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.2971078001752847, |
|
"grad_norm": 1.6049748659133911, |
|
"learning_rate": 3.491894236581728e-05, |
|
"loss": 0.3359, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.3058720420683612, |
|
"grad_norm": 1.4260120391845703, |
|
"learning_rate": 3.436645881143918e-05, |
|
"loss": 0.3758, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.3146362839614374, |
|
"grad_norm": 1.9971890449523926, |
|
"learning_rate": 3.3817885784839986e-05, |
|
"loss": 0.314, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.3234005258545136, |
|
"grad_norm": 1.7832164764404297, |
|
"learning_rate": 3.327332725314974e-05, |
|
"loss": 0.3901, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.3321647677475899, |
|
"grad_norm": 1.6533173322677612, |
|
"learning_rate": 3.273288642265985e-05, |
|
"loss": 0.3324, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.340929009640666, |
|
"grad_norm": 1.5945855379104614, |
|
"learning_rate": 3.2196665719263266e-05, |
|
"loss": 0.3435, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.3496932515337423, |
|
"grad_norm": 1.51680588722229, |
|
"learning_rate": 3.166476676904235e-05, |
|
"loss": 0.3714, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.3584574934268185, |
|
"grad_norm": 1.6684399843215942, |
|
"learning_rate": 3.113729037900843e-05, |
|
"loss": 0.333, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.3672217353198948, |
|
"grad_norm": 1.5105247497558594, |
|
"learning_rate": 3.0614336517996576e-05, |
|
"loss": 0.3615, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.375985977212971, |
|
"grad_norm": 2.113157272338867, |
|
"learning_rate": 3.0096004297719205e-05, |
|
"loss": 0.3002, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.3847502191060475, |
|
"grad_norm": 1.1173641681671143, |
|
"learning_rate": 2.958239195398217e-05, |
|
"loss": 0.3571, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.3935144609991235, |
|
"grad_norm": 1.4548070430755615, |
|
"learning_rate": 2.90735968280668e-05, |
|
"loss": 0.3252, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.4022787028922, |
|
"grad_norm": 1.7521406412124634, |
|
"learning_rate": 2.8569715348281547e-05, |
|
"loss": 0.3409, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.4110429447852761, |
|
"grad_norm": 1.7292704582214355, |
|
"learning_rate": 2.807084301168652e-05, |
|
"loss": 0.3282, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.4198071866783524, |
|
"grad_norm": 1.7453776597976685, |
|
"learning_rate": 2.7577074365994747e-05, |
|
"loss": 0.3217, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 2.02482271194458, |
|
"learning_rate": 2.70885029916531e-05, |
|
"loss": 0.3355, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.4373356704645048, |
|
"grad_norm": 1.927150845527649, |
|
"learning_rate": 2.660522148410675e-05, |
|
"loss": 0.3528, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.446099912357581, |
|
"grad_norm": 1.5125665664672852, |
|
"learning_rate": 2.6127321436250117e-05, |
|
"loss": 0.3355, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.4548641542506573, |
|
"grad_norm": 1.771646499633789, |
|
"learning_rate": 2.565489342106805e-05, |
|
"loss": 0.3375, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.4636283961437335, |
|
"grad_norm": 2.540931463241577, |
|
"learning_rate": 2.518802697447003e-05, |
|
"loss": 0.3601, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.4723926380368098, |
|
"grad_norm": 1.5853700637817383, |
|
"learning_rate": 2.472681057832121e-05, |
|
"loss": 0.3207, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.481156879929886, |
|
"grad_norm": 2.11691951751709, |
|
"learning_rate": 2.427133164367296e-05, |
|
"loss": 0.3357, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.4899211218229622, |
|
"grad_norm": 1.55061674118042, |
|
"learning_rate": 2.3821676494196572e-05, |
|
"loss": 0.3314, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.4986853637160387, |
|
"grad_norm": 1.5113292932510376, |
|
"learning_rate": 2.3377930349822856e-05, |
|
"loss": 0.3162, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.5074496056091147, |
|
"grad_norm": 2.0338950157165527, |
|
"learning_rate": 2.2940177310591113e-05, |
|
"loss": 0.3693, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.5162138475021911, |
|
"grad_norm": 2.0127060413360596, |
|
"learning_rate": 2.250850034071016e-05, |
|
"loss": 0.324, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.5249780893952674, |
|
"grad_norm": 1.9195280075073242, |
|
"learning_rate": 2.20829812528348e-05, |
|
"loss": 0.3559, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.5337423312883436, |
|
"grad_norm": 1.6400375366210938, |
|
"learning_rate": 2.1663700692560373e-05, |
|
"loss": 0.3371, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.5425065731814198, |
|
"grad_norm": 1.9201463460922241, |
|
"learning_rate": 2.1250738123138665e-05, |
|
"loss": 0.3536, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.551270815074496, |
|
"grad_norm": 1.8780487775802612, |
|
"learning_rate": 2.084417181041769e-05, |
|
"loss": 0.3829, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.5600350569675723, |
|
"grad_norm": 1.8463397026062012, |
|
"learning_rate": 2.0444078808008655e-05, |
|
"loss": 0.3431, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.5687992988606485, |
|
"grad_norm": 1.8080838918685913, |
|
"learning_rate": 2.005053494268241e-05, |
|
"loss": 0.3748, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.577563540753725, |
|
"grad_norm": 1.9387633800506592, |
|
"learning_rate": 1.9663614799998635e-05, |
|
"loss": 0.3586, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.586327782646801, |
|
"grad_norm": 1.7084999084472656, |
|
"learning_rate": 1.928339171017015e-05, |
|
"loss": 0.357, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.5950920245398774, |
|
"grad_norm": 2.1515519618988037, |
|
"learning_rate": 1.8909937734165107e-05, |
|
"loss": 0.3333, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.6038562664329534, |
|
"grad_norm": 1.4635021686553955, |
|
"learning_rate": 1.8543323650049864e-05, |
|
"loss": 0.3337, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.6126205083260299, |
|
"grad_norm": 1.7444740533828735, |
|
"learning_rate": 1.8183618939574904e-05, |
|
"loss": 0.3686, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.6213847502191059, |
|
"grad_norm": 1.587889552116394, |
|
"learning_rate": 1.7830891775006396e-05, |
|
"loss": 0.3167, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.6301489921121823, |
|
"grad_norm": 1.9329102039337158, |
|
"learning_rate": 1.748520900620609e-05, |
|
"loss": 0.3349, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.6389132340052586, |
|
"grad_norm": 1.610293984413147, |
|
"learning_rate": 1.714663614796167e-05, |
|
"loss": 0.3841, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.6476774758983348, |
|
"grad_norm": 1.9500341415405273, |
|
"learning_rate": 1.6815237367570197e-05, |
|
"loss": 0.3575, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.656441717791411, |
|
"grad_norm": 1.717809796333313, |
|
"learning_rate": 1.6491075472677016e-05, |
|
"loss": 0.2917, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.6652059596844873, |
|
"grad_norm": 1.2370789051055908, |
|
"learning_rate": 1.6174211899372175e-05, |
|
"loss": 0.3535, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.6739702015775635, |
|
"grad_norm": 1.6636922359466553, |
|
"learning_rate": 1.5864706700546955e-05, |
|
"loss": 0.3242, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.6827344434706397, |
|
"grad_norm": 1.3746954202651978, |
|
"learning_rate": 1.5562618534512428e-05, |
|
"loss": 0.3463, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.6914986853637162, |
|
"grad_norm": 1.9031116962432861, |
|
"learning_rate": 1.5268004653882406e-05, |
|
"loss": 0.3448, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.7002629272567922, |
|
"grad_norm": 1.8231595754623413, |
|
"learning_rate": 1.4980920894722692e-05, |
|
"loss": 0.3327, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.7090271691498686, |
|
"grad_norm": 2.2330541610717773, |
|
"learning_rate": 1.4701421665969001e-05, |
|
"loss": 0.344, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.7177914110429446, |
|
"grad_norm": 1.5624206066131592, |
|
"learning_rate": 1.442955993911505e-05, |
|
"loss": 0.3194, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.726555652936021, |
|
"grad_norm": 1.7255851030349731, |
|
"learning_rate": 1.4165387238173399e-05, |
|
"loss": 0.3122, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.7353198948290973, |
|
"grad_norm": 1.7800745964050293, |
|
"learning_rate": 1.3908953629910376e-05, |
|
"loss": 0.3005, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.7440841367221736, |
|
"grad_norm": 2.472658395767212, |
|
"learning_rate": 1.3660307714357338e-05, |
|
"loss": 0.3571, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.7528483786152498, |
|
"grad_norm": 1.6620064973831177, |
|
"learning_rate": 1.3419496615599805e-05, |
|
"loss": 0.3522, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.761612620508326, |
|
"grad_norm": 1.5490673780441284, |
|
"learning_rate": 1.318656597284643e-05, |
|
"loss": 0.2967, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.7703768624014022, |
|
"grad_norm": 2.1536474227905273, |
|
"learning_rate": 1.2961559931779257e-05, |
|
"loss": 0.3703, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.7791411042944785, |
|
"grad_norm": 1.490907073020935, |
|
"learning_rate": 1.274452113618716e-05, |
|
"loss": 0.3076, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.787905346187555, |
|
"grad_norm": 1.4357167482376099, |
|
"learning_rate": 1.2535490719883835e-05, |
|
"loss": 0.3265, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.796669588080631, |
|
"grad_norm": 1.5666704177856445, |
|
"learning_rate": 1.233450829891203e-05, |
|
"loss": 0.324, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.8054338299737074, |
|
"grad_norm": 1.3453813791275024, |
|
"learning_rate": 1.2141611964035366e-05, |
|
"loss": 0.2977, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.8141980718667834, |
|
"grad_norm": 1.782593011856079, |
|
"learning_rate": 1.195683827351931e-05, |
|
"loss": 0.2795, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.8229623137598598, |
|
"grad_norm": 1.9441533088684082, |
|
"learning_rate": 1.1780222246202494e-05, |
|
"loss": 0.3166, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.831726555652936, |
|
"grad_norm": 1.778911828994751, |
|
"learning_rate": 1.1611797354859892e-05, |
|
"loss": 0.3493, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.8404907975460123, |
|
"grad_norm": 1.7141963243484497, |
|
"learning_rate": 1.145159551985894e-05, |
|
"loss": 0.3313, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.8492550394390885, |
|
"grad_norm": 3.3165926933288574, |
|
"learning_rate": 1.1299647103109908e-05, |
|
"loss": 0.356, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.8580192813321648, |
|
"grad_norm": 1.787851095199585, |
|
"learning_rate": 1.11559809023116e-05, |
|
"loss": 0.3219, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.866783523225241, |
|
"grad_norm": 1.7090057134628296, |
|
"learning_rate": 1.1020624145493572e-05, |
|
"loss": 0.3445, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.8755477651183172, |
|
"grad_norm": 1.5748744010925293, |
|
"learning_rate": 1.0893602485855766e-05, |
|
"loss": 0.3253, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.8843120070113937, |
|
"grad_norm": 1.9979685544967651, |
|
"learning_rate": 1.0774939996906644e-05, |
|
"loss": 0.3177, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.8930762489044697, |
|
"grad_norm": 1.936296820640564, |
|
"learning_rate": 1.0664659167900723e-05, |
|
"loss": 0.3304, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.9018404907975461, |
|
"grad_norm": 1.8121618032455444, |
|
"learning_rate": 1.0562780899576344e-05, |
|
"loss": 0.3493, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.9106047326906221, |
|
"grad_norm": 1.6870180368423462, |
|
"learning_rate": 1.046932450019448e-05, |
|
"loss": 0.328, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.9193689745836986, |
|
"grad_norm": 2.0619869232177734, |
|
"learning_rate": 1.0384307681879428e-05, |
|
"loss": 0.3845, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.9281332164767746, |
|
"grad_norm": 2.1429038047790527, |
|
"learning_rate": 1.030774655726191e-05, |
|
"loss": 0.3143, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.936897458369851, |
|
"grad_norm": 1.9444646835327148, |
|
"learning_rate": 1.0239655636425374e-05, |
|
"loss": 0.3135, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.9456617002629273, |
|
"grad_norm": 1.6065791845321655, |
|
"learning_rate": 1.0180047824156011e-05, |
|
"loss": 0.3142, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.9544259421560035, |
|
"grad_norm": 2.215041160583496, |
|
"learning_rate": 1.0128934417497004e-05, |
|
"loss": 0.3234, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.9631901840490797, |
|
"grad_norm": 1.766499638557434, |
|
"learning_rate": 1.008632510360747e-05, |
|
"loss": 0.3395, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.971954425942156, |
|
"grad_norm": 2.355278491973877, |
|
"learning_rate": 1.0052227957926518e-05, |
|
"loss": 0.3476, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.9807186678352322, |
|
"grad_norm": 1.6923573017120361, |
|
"learning_rate": 1.0026649442642785e-05, |
|
"loss": 0.386, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.9894829097283084, |
|
"grad_norm": 1.454087495803833, |
|
"learning_rate": 1.0009594405469695e-05, |
|
"loss": 0.3059, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.9982471516213849, |
|
"grad_norm": 1.5868600606918335, |
|
"learning_rate": 1.0001066078726703e-05, |
|
"loss": 0.3474, |
|
"step": 2280 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2282, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.7769146165323366e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|