| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 148.87126296522268, | |
| "eval_steps": 500, | |
| "global_step": 30500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09762050030506407, | |
| "grad_norm": 0.8231198191642761, | |
| "learning_rate": 4e-05, | |
| "loss": 2.624, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.19524100061012814, | |
| "grad_norm": 1.040634274482727, | |
| "learning_rate": 8e-05, | |
| "loss": 2.5599, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2928615009151922, | |
| "grad_norm": 1.0133676528930664, | |
| "learning_rate": 0.00012, | |
| "loss": 2.1985, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3904820012202563, | |
| "grad_norm": 1.0250593423843384, | |
| "learning_rate": 0.00016, | |
| "loss": 1.9481, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4881025015253203, | |
| "grad_norm": 0.9968363046646118, | |
| "learning_rate": 0.0002, | |
| "loss": 1.7738, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5857230018303844, | |
| "grad_norm": 1.4279309511184692, | |
| "learning_rate": 0.0001998688524590164, | |
| "loss": 1.7329, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6833435021354485, | |
| "grad_norm": 1.3037158250808716, | |
| "learning_rate": 0.0001997377049180328, | |
| "loss": 1.6784, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7809640024405126, | |
| "grad_norm": 1.404618263244629, | |
| "learning_rate": 0.00019960655737704918, | |
| "loss": 1.6264, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8785845027455765, | |
| "grad_norm": 1.058852195739746, | |
| "learning_rate": 0.0001994754098360656, | |
| "loss": 1.6077, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9762050030506406, | |
| "grad_norm": 1.8023818731307983, | |
| "learning_rate": 0.00019934426229508198, | |
| "loss": 1.5252, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0738255033557047, | |
| "grad_norm": 1.2388545274734497, | |
| "learning_rate": 0.00019921311475409837, | |
| "loss": 1.5621, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1714460036607688, | |
| "grad_norm": 1.5438932180404663, | |
| "learning_rate": 0.00019908196721311476, | |
| "loss": 1.4877, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2690665039658329, | |
| "grad_norm": 1.590529203414917, | |
| "learning_rate": 0.00019895081967213115, | |
| "loss": 1.5568, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.366687004270897, | |
| "grad_norm": 1.2864124774932861, | |
| "learning_rate": 0.00019881967213114757, | |
| "loss": 1.5082, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.4643075045759608, | |
| "grad_norm": 1.4031420946121216, | |
| "learning_rate": 0.00019868852459016393, | |
| "loss": 1.467, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.561928004881025, | |
| "grad_norm": 1.5916552543640137, | |
| "learning_rate": 0.00019855737704918035, | |
| "loss": 1.4055, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.659548505186089, | |
| "grad_norm": 1.5989781618118286, | |
| "learning_rate": 0.00019842622950819674, | |
| "loss": 1.358, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.757169005491153, | |
| "grad_norm": 1.7474373579025269, | |
| "learning_rate": 0.00019829508196721313, | |
| "loss": 1.3905, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.8547895057962172, | |
| "grad_norm": 1.8275643587112427, | |
| "learning_rate": 0.00019816393442622951, | |
| "loss": 1.4449, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.9524100061012812, | |
| "grad_norm": 1.7919280529022217, | |
| "learning_rate": 0.0001980327868852459, | |
| "loss": 1.4057, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0500305064063453, | |
| "grad_norm": 2.0150179862976074, | |
| "learning_rate": 0.00019790163934426232, | |
| "loss": 1.3659, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.1476510067114094, | |
| "grad_norm": 2.0520079135894775, | |
| "learning_rate": 0.00019777049180327868, | |
| "loss": 1.3735, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.2452715070164735, | |
| "grad_norm": 2.1597766876220703, | |
| "learning_rate": 0.0001976393442622951, | |
| "loss": 1.318, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.3428920073215376, | |
| "grad_norm": 1.8374860286712646, | |
| "learning_rate": 0.0001975081967213115, | |
| "loss": 1.3469, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.4405125076266017, | |
| "grad_norm": 2.2065134048461914, | |
| "learning_rate": 0.00019737704918032788, | |
| "loss": 1.3822, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.5381330079316657, | |
| "grad_norm": 2.5370113849639893, | |
| "learning_rate": 0.00019724590163934427, | |
| "loss": 1.2652, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.63575350823673, | |
| "grad_norm": 2.0484936237335205, | |
| "learning_rate": 0.00019711475409836066, | |
| "loss": 1.2971, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.733374008541794, | |
| "grad_norm": 1.699357509613037, | |
| "learning_rate": 0.00019698360655737707, | |
| "loss": 1.3195, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.830994508846858, | |
| "grad_norm": 2.4636785984039307, | |
| "learning_rate": 0.00019685245901639344, | |
| "loss": 1.2474, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.9286150091519216, | |
| "grad_norm": 3.6711232662200928, | |
| "learning_rate": 0.00019672131147540985, | |
| "loss": 1.3091, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.026235509456986, | |
| "grad_norm": 1.9547876119613647, | |
| "learning_rate": 0.00019659016393442624, | |
| "loss": 1.2966, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.1238560097620502, | |
| "grad_norm": 2.135209560394287, | |
| "learning_rate": 0.00019645901639344263, | |
| "loss": 1.2201, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.221476510067114, | |
| "grad_norm": 2.2378487586975098, | |
| "learning_rate": 0.00019632786885245902, | |
| "loss": 1.2226, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.319097010372178, | |
| "grad_norm": 2.5407207012176514, | |
| "learning_rate": 0.0001961967213114754, | |
| "loss": 1.224, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.416717510677242, | |
| "grad_norm": 2.5137102603912354, | |
| "learning_rate": 0.00019606557377049183, | |
| "loss": 1.271, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.514338010982306, | |
| "grad_norm": 2.5121419429779053, | |
| "learning_rate": 0.00019593442622950822, | |
| "loss": 1.2469, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.61195851128737, | |
| "grad_norm": 2.5250003337860107, | |
| "learning_rate": 0.0001958032786885246, | |
| "loss": 1.1799, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.7095790115924343, | |
| "grad_norm": 1.8390239477157593, | |
| "learning_rate": 0.000195672131147541, | |
| "loss": 1.2629, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.8071995118974984, | |
| "grad_norm": 2.065147638320923, | |
| "learning_rate": 0.00019554098360655738, | |
| "loss": 1.2158, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.9048200122025625, | |
| "grad_norm": 2.7138381004333496, | |
| "learning_rate": 0.00019540983606557377, | |
| "loss": 1.1778, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.002440512507627, | |
| "grad_norm": 3.124082565307617, | |
| "learning_rate": 0.00019527868852459016, | |
| "loss": 1.2258, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.100061012812691, | |
| "grad_norm": 2.8344528675079346, | |
| "learning_rate": 0.00019514754098360658, | |
| "loss": 1.1718, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.197681513117755, | |
| "grad_norm": 2.2852656841278076, | |
| "learning_rate": 0.00019501639344262297, | |
| "loss": 1.132, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.295302013422819, | |
| "grad_norm": 2.662973642349243, | |
| "learning_rate": 0.00019488524590163936, | |
| "loss": 1.158, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.392922513727883, | |
| "grad_norm": 2.1522879600524902, | |
| "learning_rate": 0.00019475409836065575, | |
| "loss": 1.1263, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.490543014032947, | |
| "grad_norm": 2.541273832321167, | |
| "learning_rate": 0.00019462295081967214, | |
| "loss": 1.1807, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.588163514338011, | |
| "grad_norm": 2.0520172119140625, | |
| "learning_rate": 0.00019449180327868855, | |
| "loss": 1.1164, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 4.685784014643075, | |
| "grad_norm": 2.2306621074676514, | |
| "learning_rate": 0.00019436065573770491, | |
| "loss": 1.117, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 4.783404514948139, | |
| "grad_norm": 3.0042741298675537, | |
| "learning_rate": 0.00019422950819672133, | |
| "loss": 1.2039, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.881025015253203, | |
| "grad_norm": 2.1427409648895264, | |
| "learning_rate": 0.00019409836065573772, | |
| "loss": 1.1225, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.978645515558267, | |
| "grad_norm": 3.128009557723999, | |
| "learning_rate": 0.0001939672131147541, | |
| "loss": 1.1619, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 5.0762660158633315, | |
| "grad_norm": 3.7390689849853516, | |
| "learning_rate": 0.0001938360655737705, | |
| "loss": 1.1612, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 5.173886516168396, | |
| "grad_norm": 2.554919958114624, | |
| "learning_rate": 0.0001937049180327869, | |
| "loss": 1.1003, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 5.27150701647346, | |
| "grad_norm": 3.6076526641845703, | |
| "learning_rate": 0.0001935737704918033, | |
| "loss": 1.105, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 5.369127516778524, | |
| "grad_norm": 3.665255069732666, | |
| "learning_rate": 0.00019344262295081967, | |
| "loss": 1.039, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 5.466748017083588, | |
| "grad_norm": 2.7409827709198, | |
| "learning_rate": 0.00019331147540983608, | |
| "loss": 1.0942, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 5.564368517388652, | |
| "grad_norm": 2.087810754776001, | |
| "learning_rate": 0.00019318032786885247, | |
| "loss": 1.0797, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 5.661989017693716, | |
| "grad_norm": 2.9384937286376953, | |
| "learning_rate": 0.00019304918032786886, | |
| "loss": 1.1056, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 5.75960951799878, | |
| "grad_norm": 2.7793357372283936, | |
| "learning_rate": 0.00019291803278688525, | |
| "loss": 1.1048, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 5.857230018303844, | |
| "grad_norm": 2.1878364086151123, | |
| "learning_rate": 0.00019278688524590164, | |
| "loss": 1.1033, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 5.954850518608908, | |
| "grad_norm": 2.3006527423858643, | |
| "learning_rate": 0.00019265573770491806, | |
| "loss": 1.0708, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 6.052471018913972, | |
| "grad_norm": 2.899059295654297, | |
| "learning_rate": 0.00019252459016393442, | |
| "loss": 1.0686, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 6.150091519219036, | |
| "grad_norm": 2.8099234104156494, | |
| "learning_rate": 0.00019239344262295084, | |
| "loss": 0.9753, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 6.2477120195241005, | |
| "grad_norm": 3.555896282196045, | |
| "learning_rate": 0.00019226229508196723, | |
| "loss": 1.0251, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 6.345332519829164, | |
| "grad_norm": 2.860172748565674, | |
| "learning_rate": 0.00019213114754098362, | |
| "loss": 1.0532, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 6.442953020134228, | |
| "grad_norm": 3.038743734359741, | |
| "learning_rate": 0.000192, | |
| "loss": 1.0339, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 6.540573520439292, | |
| "grad_norm": 3.338714838027954, | |
| "learning_rate": 0.0001918688524590164, | |
| "loss": 1.0632, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 6.638194020744356, | |
| "grad_norm": 2.932199478149414, | |
| "learning_rate": 0.0001917377049180328, | |
| "loss": 1.1157, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 6.73581452104942, | |
| "grad_norm": 3.783275604248047, | |
| "learning_rate": 0.00019160655737704917, | |
| "loss": 1.0041, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 6.833435021354484, | |
| "grad_norm": 3.5673303604125977, | |
| "learning_rate": 0.0001914754098360656, | |
| "loss": 1.013, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 6.931055521659548, | |
| "grad_norm": 2.917269468307495, | |
| "learning_rate": 0.00019134426229508198, | |
| "loss": 1.1148, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 7.028676021964612, | |
| "grad_norm": 3.269557237625122, | |
| "learning_rate": 0.00019121311475409837, | |
| "loss": 0.9701, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 7.126296522269676, | |
| "grad_norm": 3.337118625640869, | |
| "learning_rate": 0.00019108196721311476, | |
| "loss": 0.9921, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 7.22391702257474, | |
| "grad_norm": 3.016709089279175, | |
| "learning_rate": 0.00019095081967213115, | |
| "loss": 0.9514, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 7.3215375228798045, | |
| "grad_norm": 2.8816025257110596, | |
| "learning_rate": 0.00019081967213114756, | |
| "loss": 0.9545, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 7.419158023184869, | |
| "grad_norm": 2.9574875831604004, | |
| "learning_rate": 0.00019068852459016395, | |
| "loss": 1.0192, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 7.516778523489933, | |
| "grad_norm": 2.7323741912841797, | |
| "learning_rate": 0.00019055737704918034, | |
| "loss": 0.9998, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 7.614399023794997, | |
| "grad_norm": 3.1870975494384766, | |
| "learning_rate": 0.00019042622950819673, | |
| "loss": 0.983, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 7.712019524100061, | |
| "grad_norm": 2.5255308151245117, | |
| "learning_rate": 0.00019029508196721312, | |
| "loss": 1.0236, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 7.809640024405125, | |
| "grad_norm": 3.190338134765625, | |
| "learning_rate": 0.0001901639344262295, | |
| "loss": 1.0263, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 7.907260524710189, | |
| "grad_norm": 2.7343392372131348, | |
| "learning_rate": 0.0001900327868852459, | |
| "loss": 0.981, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 8.004881025015253, | |
| "grad_norm": 2.4128050804138184, | |
| "learning_rate": 0.00018990163934426232, | |
| "loss": 1.0471, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 8.102501525320317, | |
| "grad_norm": 3.3373966217041016, | |
| "learning_rate": 0.0001897704918032787, | |
| "loss": 0.9562, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 8.200122025625381, | |
| "grad_norm": 4.268326282501221, | |
| "learning_rate": 0.0001896393442622951, | |
| "loss": 0.9365, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 8.297742525930445, | |
| "grad_norm": 4.0396728515625, | |
| "learning_rate": 0.00018950819672131148, | |
| "loss": 0.9469, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 8.39536302623551, | |
| "grad_norm": 2.5835509300231934, | |
| "learning_rate": 0.00018937704918032787, | |
| "loss": 0.8998, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 8.492983526540574, | |
| "grad_norm": 4.017479419708252, | |
| "learning_rate": 0.0001892459016393443, | |
| "loss": 0.9843, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 8.590604026845638, | |
| "grad_norm": 2.7477169036865234, | |
| "learning_rate": 0.00018911475409836065, | |
| "loss": 0.9164, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 8.688224527150702, | |
| "grad_norm": 3.388787031173706, | |
| "learning_rate": 0.00018898360655737707, | |
| "loss": 0.9626, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 8.785845027455766, | |
| "grad_norm": 2.938905954360962, | |
| "learning_rate": 0.00018885245901639346, | |
| "loss": 0.9662, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 8.88346552776083, | |
| "grad_norm": 2.854912519454956, | |
| "learning_rate": 0.00018872131147540985, | |
| "loss": 1.0023, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 8.981086028065894, | |
| "grad_norm": 2.120589017868042, | |
| "learning_rate": 0.00018859016393442624, | |
| "loss": 0.9616, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 9.078706528370958, | |
| "grad_norm": 2.8356106281280518, | |
| "learning_rate": 0.00018845901639344263, | |
| "loss": 0.9589, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 9.176327028676022, | |
| "grad_norm": 2.9466371536254883, | |
| "learning_rate": 0.00018832786885245904, | |
| "loss": 0.9196, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 9.273947528981086, | |
| "grad_norm": 3.5278165340423584, | |
| "learning_rate": 0.0001881967213114754, | |
| "loss": 0.8946, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 9.37156802928615, | |
| "grad_norm": 3.5878446102142334, | |
| "learning_rate": 0.00018806557377049182, | |
| "loss": 0.8855, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 9.469188529591214, | |
| "grad_norm": 2.8943283557891846, | |
| "learning_rate": 0.0001879344262295082, | |
| "loss": 0.9094, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 9.566809029896278, | |
| "grad_norm": 2.825993061065674, | |
| "learning_rate": 0.0001878032786885246, | |
| "loss": 0.9591, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 9.664429530201343, | |
| "grad_norm": 3.7361643314361572, | |
| "learning_rate": 0.000187672131147541, | |
| "loss": 0.9368, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 9.762050030506407, | |
| "grad_norm": 3.8567728996276855, | |
| "learning_rate": 0.00018754098360655738, | |
| "loss": 0.8843, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 9.85967053081147, | |
| "grad_norm": 3.694343090057373, | |
| "learning_rate": 0.0001874098360655738, | |
| "loss": 0.9235, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 9.957291031116535, | |
| "grad_norm": 4.091517448425293, | |
| "learning_rate": 0.00018727868852459016, | |
| "loss": 0.9418, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 10.054911531421599, | |
| "grad_norm": 3.1028010845184326, | |
| "learning_rate": 0.00018714754098360657, | |
| "loss": 0.8961, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 10.152532031726663, | |
| "grad_norm": 2.9279282093048096, | |
| "learning_rate": 0.00018701639344262296, | |
| "loss": 0.8457, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 10.250152532031727, | |
| "grad_norm": 2.750516176223755, | |
| "learning_rate": 0.00018688524590163935, | |
| "loss": 0.8941, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 10.347773032336791, | |
| "grad_norm": 3.211580276489258, | |
| "learning_rate": 0.00018675409836065574, | |
| "loss": 0.8799, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 10.445393532641855, | |
| "grad_norm": 3.7746126651763916, | |
| "learning_rate": 0.00018662295081967213, | |
| "loss": 0.8288, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 10.54301403294692, | |
| "grad_norm": 2.8005576133728027, | |
| "learning_rate": 0.00018649180327868855, | |
| "loss": 0.9162, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 10.640634533251983, | |
| "grad_norm": 2.9262828826904297, | |
| "learning_rate": 0.00018636065573770494, | |
| "loss": 0.83, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 10.738255033557047, | |
| "grad_norm": 4.560007095336914, | |
| "learning_rate": 0.00018622950819672133, | |
| "loss": 0.956, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 10.835875533862112, | |
| "grad_norm": 2.39521861076355, | |
| "learning_rate": 0.00018609836065573772, | |
| "loss": 0.9038, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 10.933496034167176, | |
| "grad_norm": 2.711963653564453, | |
| "learning_rate": 0.0001859672131147541, | |
| "loss": 0.9322, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 11.03111653447224, | |
| "grad_norm": 2.9960291385650635, | |
| "learning_rate": 0.0001858360655737705, | |
| "loss": 0.8956, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 11.128737034777304, | |
| "grad_norm": 3.7117059230804443, | |
| "learning_rate": 0.00018570491803278688, | |
| "loss": 0.8253, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 11.226357535082368, | |
| "grad_norm": 2.5004775524139404, | |
| "learning_rate": 0.0001855737704918033, | |
| "loss": 0.8075, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 11.323978035387432, | |
| "grad_norm": 2.690229654312134, | |
| "learning_rate": 0.0001854426229508197, | |
| "loss": 0.8528, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 11.421598535692496, | |
| "grad_norm": 2.5582730770111084, | |
| "learning_rate": 0.00018531147540983608, | |
| "loss": 0.9235, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 11.51921903599756, | |
| "grad_norm": 3.388638973236084, | |
| "learning_rate": 0.00018518032786885247, | |
| "loss": 0.8562, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 11.616839536302624, | |
| "grad_norm": 2.6668691635131836, | |
| "learning_rate": 0.00018504918032786886, | |
| "loss": 0.8447, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 11.714460036607688, | |
| "grad_norm": 2.5166635513305664, | |
| "learning_rate": 0.00018491803278688527, | |
| "loss": 0.8748, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 11.812080536912752, | |
| "grad_norm": 2.641568183898926, | |
| "learning_rate": 0.00018478688524590164, | |
| "loss": 0.8401, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 11.909701037217816, | |
| "grad_norm": 4.179554462432861, | |
| "learning_rate": 0.00018465573770491805, | |
| "loss": 0.8231, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 12.00732153752288, | |
| "grad_norm": 3.014526844024658, | |
| "learning_rate": 0.00018452459016393444, | |
| "loss": 0.8792, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 12.104942037827945, | |
| "grad_norm": 4.275758743286133, | |
| "learning_rate": 0.0001843934426229508, | |
| "loss": 0.8161, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 12.202562538133009, | |
| "grad_norm": 2.949131965637207, | |
| "learning_rate": 0.00018426229508196722, | |
| "loss": 0.8304, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 12.300183038438073, | |
| "grad_norm": 3.018010377883911, | |
| "learning_rate": 0.0001841311475409836, | |
| "loss": 0.8373, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 12.397803538743137, | |
| "grad_norm": 4.015832424163818, | |
| "learning_rate": 0.00018400000000000003, | |
| "loss": 0.779, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 12.495424039048201, | |
| "grad_norm": 3.1773481369018555, | |
| "learning_rate": 0.0001838688524590164, | |
| "loss": 0.8428, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 12.593044539353265, | |
| "grad_norm": 3.50673508644104, | |
| "learning_rate": 0.0001837377049180328, | |
| "loss": 0.8148, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 12.690665039658327, | |
| "grad_norm": 2.848912239074707, | |
| "learning_rate": 0.0001836065573770492, | |
| "loss": 0.8427, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 12.788285539963393, | |
| "grad_norm": 3.488842725753784, | |
| "learning_rate": 0.00018347540983606558, | |
| "loss": 0.8374, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 12.885906040268456, | |
| "grad_norm": 3.831639528274536, | |
| "learning_rate": 0.00018334426229508197, | |
| "loss": 0.7835, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 12.98352654057352, | |
| "grad_norm": 3.8596065044403076, | |
| "learning_rate": 0.00018321311475409836, | |
| "loss": 0.8719, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 13.081147040878584, | |
| "grad_norm": 4.338437557220459, | |
| "learning_rate": 0.00018308196721311478, | |
| "loss": 0.7768, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 13.178767541183648, | |
| "grad_norm": 4.147676944732666, | |
| "learning_rate": 0.00018295081967213114, | |
| "loss": 0.7686, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 13.276388041488712, | |
| "grad_norm": 3.430076837539673, | |
| "learning_rate": 0.00018281967213114756, | |
| "loss": 0.7069, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 13.374008541793776, | |
| "grad_norm": 3.6902527809143066, | |
| "learning_rate": 0.00018268852459016395, | |
| "loss": 0.8241, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 13.47162904209884, | |
| "grad_norm": 3.362191915512085, | |
| "learning_rate": 0.00018255737704918034, | |
| "loss": 0.8096, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 13.569249542403904, | |
| "grad_norm": 3.1833972930908203, | |
| "learning_rate": 0.00018242622950819673, | |
| "loss": 0.8258, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 13.666870042708968, | |
| "grad_norm": 3.001095771789551, | |
| "learning_rate": 0.00018229508196721312, | |
| "loss": 0.7793, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 13.764490543014032, | |
| "grad_norm": 4.468006134033203, | |
| "learning_rate": 0.00018216393442622953, | |
| "loss": 0.7837, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 13.862111043319096, | |
| "grad_norm": 3.7999494075775146, | |
| "learning_rate": 0.00018203278688524592, | |
| "loss": 0.8158, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 13.95973154362416, | |
| "grad_norm": 2.7308099269866943, | |
| "learning_rate": 0.0001819016393442623, | |
| "loss": 0.8657, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 14.057352043929225, | |
| "grad_norm": 3.631223678588867, | |
| "learning_rate": 0.0001817704918032787, | |
| "loss": 0.8219, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 14.154972544234289, | |
| "grad_norm": 3.7246315479278564, | |
| "learning_rate": 0.0001816393442622951, | |
| "loss": 0.738, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 14.252593044539353, | |
| "grad_norm": 3.8941688537597656, | |
| "learning_rate": 0.00018150819672131148, | |
| "loss": 0.7882, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 14.350213544844417, | |
| "grad_norm": 4.167781352996826, | |
| "learning_rate": 0.00018137704918032787, | |
| "loss": 0.7396, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 14.44783404514948, | |
| "grad_norm": 3.553039312362671, | |
| "learning_rate": 0.00018124590163934429, | |
| "loss": 0.7614, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 14.545454545454545, | |
| "grad_norm": 3.1469032764434814, | |
| "learning_rate": 0.00018111475409836067, | |
| "loss": 0.7651, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 14.643075045759609, | |
| "grad_norm": 5.265510559082031, | |
| "learning_rate": 0.00018098360655737704, | |
| "loss": 0.8169, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 14.740695546064673, | |
| "grad_norm": 2.910022020339966, | |
| "learning_rate": 0.00018085245901639345, | |
| "loss": 0.7746, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 14.838316046369737, | |
| "grad_norm": 2.8065860271453857, | |
| "learning_rate": 0.00018072131147540984, | |
| "loss": 0.8028, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 14.935936546674801, | |
| "grad_norm": 2.44608736038208, | |
| "learning_rate": 0.00018059016393442626, | |
| "loss": 0.7915, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 15.033557046979865, | |
| "grad_norm": 2.230102300643921, | |
| "learning_rate": 0.00018045901639344262, | |
| "loss": 0.7283, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 15.13117754728493, | |
| "grad_norm": 3.1589298248291016, | |
| "learning_rate": 0.00018032786885245904, | |
| "loss": 0.7117, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 15.228798047589994, | |
| "grad_norm": 3.1552984714508057, | |
| "learning_rate": 0.00018019672131147543, | |
| "loss": 0.7047, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 15.326418547895058, | |
| "grad_norm": 3.4300222396850586, | |
| "learning_rate": 0.0001800655737704918, | |
| "loss": 0.7328, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 15.424039048200122, | |
| "grad_norm": 2.5847089290618896, | |
| "learning_rate": 0.0001799344262295082, | |
| "loss": 0.7774, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 15.521659548505186, | |
| "grad_norm": 2.6597721576690674, | |
| "learning_rate": 0.0001798032786885246, | |
| "loss": 0.7427, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 15.61928004881025, | |
| "grad_norm": 3.029202699661255, | |
| "learning_rate": 0.000179672131147541, | |
| "loss": 0.7037, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 15.716900549115314, | |
| "grad_norm": 4.356511116027832, | |
| "learning_rate": 0.00017954098360655737, | |
| "loss": 0.7797, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 15.814521049420378, | |
| "grad_norm": 3.456430435180664, | |
| "learning_rate": 0.0001794098360655738, | |
| "loss": 0.7839, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 15.912141549725442, | |
| "grad_norm": 2.608675718307495, | |
| "learning_rate": 0.00017927868852459018, | |
| "loss": 0.7987, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 16.009762050030506, | |
| "grad_norm": 3.393676519393921, | |
| "learning_rate": 0.00017914754098360657, | |
| "loss": 0.7627, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 16.107382550335572, | |
| "grad_norm": 3.7134811878204346, | |
| "learning_rate": 0.00017901639344262296, | |
| "loss": 0.6441, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 16.205003050640634, | |
| "grad_norm": 4.507261276245117, | |
| "learning_rate": 0.00017888524590163935, | |
| "loss": 0.6972, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 16.3026235509457, | |
| "grad_norm": 3.150489091873169, | |
| "learning_rate": 0.00017875409836065576, | |
| "loss": 0.7079, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 16.400244051250763, | |
| "grad_norm": 3.351140022277832, | |
| "learning_rate": 0.00017862295081967213, | |
| "loss": 0.7319, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 16.49786455155583, | |
| "grad_norm": 3.533992290496826, | |
| "learning_rate": 0.00017849180327868852, | |
| "loss": 0.7095, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 16.59548505186089, | |
| "grad_norm": 3.086233139038086, | |
| "learning_rate": 0.00017836065573770493, | |
| "loss": 0.7653, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 16.693105552165953, | |
| "grad_norm": 2.891784191131592, | |
| "learning_rate": 0.00017822950819672132, | |
| "loss": 0.7096, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 16.79072605247102, | |
| "grad_norm": 3.5396888256073, | |
| "learning_rate": 0.0001780983606557377, | |
| "loss": 0.7385, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 16.888346552776085, | |
| "grad_norm": 3.5215485095977783, | |
| "learning_rate": 0.0001779672131147541, | |
| "loss": 0.791, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 16.985967053081147, | |
| "grad_norm": 3.2734858989715576, | |
| "learning_rate": 0.00017783606557377052, | |
| "loss": 0.7904, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 17.08358755338621, | |
| "grad_norm": 3.1748645305633545, | |
| "learning_rate": 0.0001777049180327869, | |
| "loss": 0.6706, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 17.181208053691275, | |
| "grad_norm": 2.6187920570373535, | |
| "learning_rate": 0.00017757377049180327, | |
| "loss": 0.7038, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 17.278828553996338, | |
| "grad_norm": 2.9449117183685303, | |
| "learning_rate": 0.00017744262295081969, | |
| "loss": 0.7214, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 17.376449054301403, | |
| "grad_norm": 2.324122667312622, | |
| "learning_rate": 0.00017731147540983607, | |
| "loss": 0.7377, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 17.474069554606466, | |
| "grad_norm": 3.7478408813476562, | |
| "learning_rate": 0.00017718032786885246, | |
| "loss": 0.6557, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 17.57169005491153, | |
| "grad_norm": 2.6289916038513184, | |
| "learning_rate": 0.00017704918032786885, | |
| "loss": 0.7385, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 17.669310555216594, | |
| "grad_norm": 3.6009535789489746, | |
| "learning_rate": 0.00017691803278688527, | |
| "loss": 0.7495, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 17.76693105552166, | |
| "grad_norm": 4.0030035972595215, | |
| "learning_rate": 0.00017678688524590166, | |
| "loss": 0.6941, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 17.864551555826722, | |
| "grad_norm": 2.4140219688415527, | |
| "learning_rate": 0.00017665573770491802, | |
| "loss": 0.6882, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 17.962172056131788, | |
| "grad_norm": 3.9716620445251465, | |
| "learning_rate": 0.00017652459016393444, | |
| "loss": 0.7051, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 18.05979255643685, | |
| "grad_norm": 3.0810763835906982, | |
| "learning_rate": 0.00017639344262295083, | |
| "loss": 0.7016, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 18.157413056741916, | |
| "grad_norm": 3.1639134883880615, | |
| "learning_rate": 0.00017626229508196724, | |
| "loss": 0.699, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 18.25503355704698, | |
| "grad_norm": 3.6788012981414795, | |
| "learning_rate": 0.0001761311475409836, | |
| "loss": 0.6673, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 18.352654057352044, | |
| "grad_norm": 3.2544620037078857, | |
| "learning_rate": 0.00017600000000000002, | |
| "loss": 0.7313, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 18.450274557657107, | |
| "grad_norm": 3.900455951690674, | |
| "learning_rate": 0.0001758688524590164, | |
| "loss": 0.7271, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 18.547895057962172, | |
| "grad_norm": 3.3089077472686768, | |
| "learning_rate": 0.00017573770491803277, | |
| "loss": 0.6452, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 18.645515558267235, | |
| "grad_norm": 3.921875, | |
| "learning_rate": 0.0001756065573770492, | |
| "loss": 0.6498, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 18.7431360585723, | |
| "grad_norm": 3.5202882289886475, | |
| "learning_rate": 0.00017547540983606558, | |
| "loss": 0.685, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 18.840756558877363, | |
| "grad_norm": 4.708593368530273, | |
| "learning_rate": 0.000175344262295082, | |
| "loss": 0.6824, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 18.93837705918243, | |
| "grad_norm": 3.8522069454193115, | |
| "learning_rate": 0.00017521311475409836, | |
| "loss": 0.6827, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 19.03599755948749, | |
| "grad_norm": 3.08516788482666, | |
| "learning_rate": 0.00017508196721311475, | |
| "loss": 0.6848, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 19.133618059792557, | |
| "grad_norm": 6.1903815269470215, | |
| "learning_rate": 0.00017495081967213116, | |
| "loss": 0.5924, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 19.23123856009762, | |
| "grad_norm": 4.409329891204834, | |
| "learning_rate": 0.00017481967213114753, | |
| "loss": 0.6484, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 19.328859060402685, | |
| "grad_norm": 3.1954550743103027, | |
| "learning_rate": 0.00017468852459016394, | |
| "loss": 0.6759, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 19.426479560707747, | |
| "grad_norm": 4.925632953643799, | |
| "learning_rate": 0.00017455737704918033, | |
| "loss": 0.6447, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 19.524100061012813, | |
| "grad_norm": 3.4807968139648438, | |
| "learning_rate": 0.00017442622950819675, | |
| "loss": 0.6905, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 19.621720561317876, | |
| "grad_norm": 3.031001567840576, | |
| "learning_rate": 0.0001742950819672131, | |
| "loss": 0.68, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 19.71934106162294, | |
| "grad_norm": 2.873664140701294, | |
| "learning_rate": 0.0001741639344262295, | |
| "loss": 0.6986, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 19.816961561928004, | |
| "grad_norm": 3.4913737773895264, | |
| "learning_rate": 0.00017403278688524592, | |
| "loss": 0.7319, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 19.91458206223307, | |
| "grad_norm": 4.207777976989746, | |
| "learning_rate": 0.0001739016393442623, | |
| "loss": 0.6479, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 20.012202562538132, | |
| "grad_norm": 2.822380781173706, | |
| "learning_rate": 0.0001737704918032787, | |
| "loss": 0.683, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 20.109823062843198, | |
| "grad_norm": 3.4512064456939697, | |
| "learning_rate": 0.00017363934426229509, | |
| "loss": 0.6329, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 20.20744356314826, | |
| "grad_norm": 4.552077293395996, | |
| "learning_rate": 0.0001735081967213115, | |
| "loss": 0.6622, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 20.305064063453326, | |
| "grad_norm": 3.7285406589508057, | |
| "learning_rate": 0.00017337704918032786, | |
| "loss": 0.6287, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 20.40268456375839, | |
| "grad_norm": 3.0254034996032715, | |
| "learning_rate": 0.00017324590163934425, | |
| "loss": 0.6696, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 20.500305064063454, | |
| "grad_norm": 3.474151372909546, | |
| "learning_rate": 0.00017311475409836067, | |
| "loss": 0.623, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 20.597925564368516, | |
| "grad_norm": 3.686966896057129, | |
| "learning_rate": 0.00017298360655737706, | |
| "loss": 0.7025, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 20.695546064673582, | |
| "grad_norm": 4.044944763183594, | |
| "learning_rate": 0.00017285245901639345, | |
| "loss": 0.6287, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 20.793166564978645, | |
| "grad_norm": 4.093704700469971, | |
| "learning_rate": 0.00017272131147540984, | |
| "loss": 0.6523, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 20.89078706528371, | |
| "grad_norm": 3.9666953086853027, | |
| "learning_rate": 0.00017259016393442625, | |
| "loss": 0.6513, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 20.988407565588773, | |
| "grad_norm": 3.888939142227173, | |
| "learning_rate": 0.00017245901639344264, | |
| "loss": 0.682, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 21.08602806589384, | |
| "grad_norm": 3.227346420288086, | |
| "learning_rate": 0.000172327868852459, | |
| "loss": 0.6342, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 21.1836485661989, | |
| "grad_norm": 2.9979476928710938, | |
| "learning_rate": 0.00017219672131147542, | |
| "loss": 0.6028, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 21.281269066503967, | |
| "grad_norm": 3.6194779872894287, | |
| "learning_rate": 0.0001720655737704918, | |
| "loss": 0.6294, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 21.37888956680903, | |
| "grad_norm": 4.482040882110596, | |
| "learning_rate": 0.0001719344262295082, | |
| "loss": 0.6283, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 21.476510067114095, | |
| "grad_norm": 3.0028133392333984, | |
| "learning_rate": 0.0001718032786885246, | |
| "loss": 0.6292, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 21.574130567419157, | |
| "grad_norm": 3.0038769245147705, | |
| "learning_rate": 0.00017167213114754098, | |
| "loss": 0.6349, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 21.671751067724223, | |
| "grad_norm": 2.854794502258301, | |
| "learning_rate": 0.0001715409836065574, | |
| "loss": 0.632, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 21.769371568029285, | |
| "grad_norm": 4.145231246948242, | |
| "learning_rate": 0.00017140983606557376, | |
| "loss": 0.6518, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 21.86699206833435, | |
| "grad_norm": 2.7917041778564453, | |
| "learning_rate": 0.00017127868852459018, | |
| "loss": 0.6595, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 21.964612568639414, | |
| "grad_norm": 2.858912706375122, | |
| "learning_rate": 0.00017114754098360656, | |
| "loss": 0.6507, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 22.06223306894448, | |
| "grad_norm": 4.860498428344727, | |
| "learning_rate": 0.00017101639344262298, | |
| "loss": 0.6025, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 22.15985356924954, | |
| "grad_norm": 3.04990291595459, | |
| "learning_rate": 0.00017088524590163934, | |
| "loss": 0.5384, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 22.257474069554608, | |
| "grad_norm": 3.768587827682495, | |
| "learning_rate": 0.00017075409836065573, | |
| "loss": 0.5799, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 22.35509456985967, | |
| "grad_norm": 3.870783805847168, | |
| "learning_rate": 0.00017062295081967215, | |
| "loss": 0.6453, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 22.452715070164736, | |
| "grad_norm": 4.49683952331543, | |
| "learning_rate": 0.0001704918032786885, | |
| "loss": 0.6728, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 22.550335570469798, | |
| "grad_norm": 3.2914700508117676, | |
| "learning_rate": 0.00017036065573770493, | |
| "loss": 0.6644, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 22.647956070774864, | |
| "grad_norm": 2.4408113956451416, | |
| "learning_rate": 0.00017022950819672132, | |
| "loss": 0.6034, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 22.745576571079926, | |
| "grad_norm": 4.22420072555542, | |
| "learning_rate": 0.00017009836065573773, | |
| "loss": 0.6464, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 22.843197071384992, | |
| "grad_norm": 3.422456979751587, | |
| "learning_rate": 0.0001699672131147541, | |
| "loss": 0.6017, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 22.940817571690054, | |
| "grad_norm": 2.6850616931915283, | |
| "learning_rate": 0.00016983606557377049, | |
| "loss": 0.6059, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 23.03843807199512, | |
| "grad_norm": 4.270930290222168, | |
| "learning_rate": 0.0001697049180327869, | |
| "loss": 0.6273, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 23.136058572300183, | |
| "grad_norm": 3.8276960849761963, | |
| "learning_rate": 0.0001695737704918033, | |
| "loss": 0.5779, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 23.23367907260525, | |
| "grad_norm": 4.058398723602295, | |
| "learning_rate": 0.00016944262295081968, | |
| "loss": 0.593, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 23.33129957291031, | |
| "grad_norm": 4.362659454345703, | |
| "learning_rate": 0.00016931147540983607, | |
| "loss": 0.5927, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 23.428920073215377, | |
| "grad_norm": 3.7719762325286865, | |
| "learning_rate": 0.00016918032786885249, | |
| "loss": 0.6487, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 23.52654057352044, | |
| "grad_norm": 2.9975759983062744, | |
| "learning_rate": 0.00016904918032786885, | |
| "loss": 0.614, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 23.624161073825505, | |
| "grad_norm": 3.0442936420440674, | |
| "learning_rate": 0.00016891803278688524, | |
| "loss": 0.5966, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 23.721781574130567, | |
| "grad_norm": 2.8749730587005615, | |
| "learning_rate": 0.00016878688524590165, | |
| "loss": 0.6166, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 23.819402074435633, | |
| "grad_norm": 3.6400644779205322, | |
| "learning_rate": 0.00016865573770491804, | |
| "loss": 0.6329, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 23.917022574740695, | |
| "grad_norm": 3.3705861568450928, | |
| "learning_rate": 0.00016852459016393443, | |
| "loss": 0.5753, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 24.01464307504576, | |
| "grad_norm": 2.9685709476470947, | |
| "learning_rate": 0.00016839344262295082, | |
| "loss": 0.6302, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 24.112263575350823, | |
| "grad_norm": 3.0799593925476074, | |
| "learning_rate": 0.0001682622950819672, | |
| "loss": 0.5936, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 24.20988407565589, | |
| "grad_norm": 2.598597764968872, | |
| "learning_rate": 0.00016813114754098363, | |
| "loss": 0.5836, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 24.30750457596095, | |
| "grad_norm": 3.719918966293335, | |
| "learning_rate": 0.000168, | |
| "loss": 0.5754, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 24.405125076266017, | |
| "grad_norm": 3.592268466949463, | |
| "learning_rate": 0.0001678688524590164, | |
| "loss": 0.5683, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 24.50274557657108, | |
| "grad_norm": 2.984259605407715, | |
| "learning_rate": 0.0001677377049180328, | |
| "loss": 0.5795, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 24.600366076876146, | |
| "grad_norm": 2.833623170852661, | |
| "learning_rate": 0.00016760655737704919, | |
| "loss": 0.618, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 24.697986577181208, | |
| "grad_norm": 5.3122172355651855, | |
| "learning_rate": 0.00016747540983606558, | |
| "loss": 0.5863, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 24.795607077486274, | |
| "grad_norm": 2.9093785285949707, | |
| "learning_rate": 0.00016734426229508196, | |
| "loss": 0.5863, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 24.893227577791336, | |
| "grad_norm": 2.8090052604675293, | |
| "learning_rate": 0.00016721311475409838, | |
| "loss": 0.6045, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 24.990848078096402, | |
| "grad_norm": 2.9333415031433105, | |
| "learning_rate": 0.00016708196721311474, | |
| "loss": 0.612, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 25.088468578401464, | |
| "grad_norm": 2.933706760406494, | |
| "learning_rate": 0.00016695081967213116, | |
| "loss": 0.5534, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 25.18608907870653, | |
| "grad_norm": 4.508391380310059, | |
| "learning_rate": 0.00016681967213114755, | |
| "loss": 0.5384, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 25.283709579011592, | |
| "grad_norm": 4.106027603149414, | |
| "learning_rate": 0.00016668852459016397, | |
| "loss": 0.5464, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 25.381330079316655, | |
| "grad_norm": 4.005650997161865, | |
| "learning_rate": 0.00016655737704918033, | |
| "loss": 0.5805, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 25.47895057962172, | |
| "grad_norm": 3.259774923324585, | |
| "learning_rate": 0.00016642622950819672, | |
| "loss": 0.5966, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 25.576571079926783, | |
| "grad_norm": 3.695301055908203, | |
| "learning_rate": 0.00016629508196721313, | |
| "loss": 0.5344, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 25.67419158023185, | |
| "grad_norm": 2.9240682125091553, | |
| "learning_rate": 0.0001661639344262295, | |
| "loss": 0.6109, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 25.77181208053691, | |
| "grad_norm": 5.290099620819092, | |
| "learning_rate": 0.0001660327868852459, | |
| "loss": 0.5831, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 25.869432580841977, | |
| "grad_norm": 3.4903934001922607, | |
| "learning_rate": 0.0001659016393442623, | |
| "loss": 0.6106, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 25.96705308114704, | |
| "grad_norm": 4.101973533630371, | |
| "learning_rate": 0.0001657704918032787, | |
| "loss": 0.6319, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 26.064673581452105, | |
| "grad_norm": 3.657115936279297, | |
| "learning_rate": 0.00016563934426229508, | |
| "loss": 0.5218, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 26.162294081757167, | |
| "grad_norm": 3.4918181896209717, | |
| "learning_rate": 0.00016550819672131147, | |
| "loss": 0.5848, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 26.259914582062233, | |
| "grad_norm": 3.117476224899292, | |
| "learning_rate": 0.00016537704918032789, | |
| "loss": 0.5579, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 26.357535082367296, | |
| "grad_norm": 4.165419578552246, | |
| "learning_rate": 0.00016524590163934428, | |
| "loss": 0.535, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 26.45515558267236, | |
| "grad_norm": 4.682600021362305, | |
| "learning_rate": 0.00016511475409836067, | |
| "loss": 0.6043, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 26.552776082977424, | |
| "grad_norm": 3.7358362674713135, | |
| "learning_rate": 0.00016498360655737705, | |
| "loss": 0.5605, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 26.65039658328249, | |
| "grad_norm": 3.1440377235412598, | |
| "learning_rate": 0.00016485245901639344, | |
| "loss": 0.5344, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 26.748017083587552, | |
| "grad_norm": 4.453829288482666, | |
| "learning_rate": 0.00016472131147540983, | |
| "loss": 0.564, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 26.845637583892618, | |
| "grad_norm": 3.959137439727783, | |
| "learning_rate": 0.00016459016393442622, | |
| "loss": 0.5986, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 26.94325808419768, | |
| "grad_norm": 3.9074790477752686, | |
| "learning_rate": 0.00016445901639344264, | |
| "loss": 0.5789, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 27.040878584502746, | |
| "grad_norm": 2.9816102981567383, | |
| "learning_rate": 0.00016432786885245903, | |
| "loss": 0.5651, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 27.13849908480781, | |
| "grad_norm": 2.41890549659729, | |
| "learning_rate": 0.00016419672131147542, | |
| "loss": 0.5405, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 27.236119585112874, | |
| "grad_norm": 3.1744871139526367, | |
| "learning_rate": 0.0001640655737704918, | |
| "loss": 0.5646, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 27.333740085417936, | |
| "grad_norm": 3.6110246181488037, | |
| "learning_rate": 0.0001639344262295082, | |
| "loss": 0.5532, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 27.431360585723002, | |
| "grad_norm": 4.252525806427002, | |
| "learning_rate": 0.0001638032786885246, | |
| "loss": 0.5243, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 27.528981086028065, | |
| "grad_norm": 3.710491895675659, | |
| "learning_rate": 0.00016367213114754098, | |
| "loss": 0.57, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 27.62660158633313, | |
| "grad_norm": 3.5450077056884766, | |
| "learning_rate": 0.0001635409836065574, | |
| "loss": 0.5401, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 27.724222086638193, | |
| "grad_norm": 3.5488388538360596, | |
| "learning_rate": 0.00016340983606557378, | |
| "loss": 0.5345, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 27.82184258694326, | |
| "grad_norm": 4.381565570831299, | |
| "learning_rate": 0.00016327868852459017, | |
| "loss": 0.5362, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 27.91946308724832, | |
| "grad_norm": 4.077484130859375, | |
| "learning_rate": 0.00016314754098360656, | |
| "loss": 0.6039, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 28.017083587553387, | |
| "grad_norm": 3.217458486557007, | |
| "learning_rate": 0.00016301639344262295, | |
| "loss": 0.6027, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 28.11470408785845, | |
| "grad_norm": 3.741176128387451, | |
| "learning_rate": 0.00016288524590163937, | |
| "loss": 0.4955, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 28.212324588163515, | |
| "grad_norm": 4.43143892288208, | |
| "learning_rate": 0.00016275409836065573, | |
| "loss": 0.5326, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 28.309945088468577, | |
| "grad_norm": 3.040767192840576, | |
| "learning_rate": 0.00016262295081967214, | |
| "loss": 0.5207, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 28.407565588773643, | |
| "grad_norm": 3.47582745552063, | |
| "learning_rate": 0.00016249180327868853, | |
| "loss": 0.5524, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 28.505186089078705, | |
| "grad_norm": 4.133968830108643, | |
| "learning_rate": 0.00016236065573770492, | |
| "loss": 0.5553, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 28.60280658938377, | |
| "grad_norm": 3.2506773471832275, | |
| "learning_rate": 0.0001622295081967213, | |
| "loss": 0.5287, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 28.700427089688834, | |
| "grad_norm": 4.527762413024902, | |
| "learning_rate": 0.0001620983606557377, | |
| "loss": 0.507, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 28.7980475899939, | |
| "grad_norm": 4.723377704620361, | |
| "learning_rate": 0.00016196721311475412, | |
| "loss": 0.6089, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 28.89566809029896, | |
| "grad_norm": 3.6172468662261963, | |
| "learning_rate": 0.00016183606557377048, | |
| "loss": 0.542, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 28.993288590604028, | |
| "grad_norm": 3.735133647918701, | |
| "learning_rate": 0.0001617049180327869, | |
| "loss": 0.5656, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 29.09090909090909, | |
| "grad_norm": 5.235744476318359, | |
| "learning_rate": 0.0001615737704918033, | |
| "loss": 0.5355, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 29.188529591214156, | |
| "grad_norm": 4.541784763336182, | |
| "learning_rate": 0.00016144262295081968, | |
| "loss": 0.5122, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 29.286150091519218, | |
| "grad_norm": 3.0607311725616455, | |
| "learning_rate": 0.00016131147540983607, | |
| "loss": 0.5047, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 29.383770591824284, | |
| "grad_norm": 4.826447486877441, | |
| "learning_rate": 0.00016118032786885245, | |
| "loss": 0.5077, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 29.481391092129346, | |
| "grad_norm": 3.6418137550354004, | |
| "learning_rate": 0.00016104918032786887, | |
| "loss": 0.5401, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 29.579011592434412, | |
| "grad_norm": 6.086111545562744, | |
| "learning_rate": 0.00016091803278688526, | |
| "loss": 0.5311, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 29.676632092739474, | |
| "grad_norm": 2.81947922706604, | |
| "learning_rate": 0.00016078688524590165, | |
| "loss": 0.523, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 29.77425259304454, | |
| "grad_norm": 4.2681355476379395, | |
| "learning_rate": 0.00016065573770491804, | |
| "loss": 0.5439, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 29.871873093349603, | |
| "grad_norm": 3.6135213375091553, | |
| "learning_rate": 0.00016052459016393443, | |
| "loss": 0.5177, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 29.96949359365467, | |
| "grad_norm": 3.3115713596343994, | |
| "learning_rate": 0.00016039344262295082, | |
| "loss": 0.581, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 30.06711409395973, | |
| "grad_norm": 3.5007998943328857, | |
| "learning_rate": 0.0001602622950819672, | |
| "loss": 0.5092, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 30.164734594264797, | |
| "grad_norm": 2.9520623683929443, | |
| "learning_rate": 0.00016013114754098362, | |
| "loss": 0.4636, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 30.26235509456986, | |
| "grad_norm": 3.135349750518799, | |
| "learning_rate": 0.00016, | |
| "loss": 0.5391, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 30.359975594874925, | |
| "grad_norm": 4.124795913696289, | |
| "learning_rate": 0.0001598688524590164, | |
| "loss": 0.5185, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 30.457596095179987, | |
| "grad_norm": 3.6246888637542725, | |
| "learning_rate": 0.0001597377049180328, | |
| "loss": 0.532, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 30.555216595485053, | |
| "grad_norm": 3.1558213233947754, | |
| "learning_rate": 0.00015960655737704918, | |
| "loss": 0.5261, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 30.652837095790115, | |
| "grad_norm": 3.2610013484954834, | |
| "learning_rate": 0.0001594754098360656, | |
| "loss": 0.5104, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 30.75045759609518, | |
| "grad_norm": 3.8648669719696045, | |
| "learning_rate": 0.00015934426229508196, | |
| "loss": 0.538, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 30.848078096400243, | |
| "grad_norm": 3.0972094535827637, | |
| "learning_rate": 0.00015921311475409838, | |
| "loss": 0.5362, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 30.94569859670531, | |
| "grad_norm": 4.238556385040283, | |
| "learning_rate": 0.00015908196721311477, | |
| "loss": 0.54, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 31.04331909701037, | |
| "grad_norm": 4.820079326629639, | |
| "learning_rate": 0.00015895081967213116, | |
| "loss": 0.4893, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 31.140939597315437, | |
| "grad_norm": 3.2895243167877197, | |
| "learning_rate": 0.00015881967213114754, | |
| "loss": 0.4502, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 31.2385600976205, | |
| "grad_norm": 3.5625295639038086, | |
| "learning_rate": 0.00015868852459016393, | |
| "loss": 0.5033, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 31.336180597925566, | |
| "grad_norm": 3.8144872188568115, | |
| "learning_rate": 0.00015855737704918035, | |
| "loss": 0.515, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 31.433801098230628, | |
| "grad_norm": 4.3353271484375, | |
| "learning_rate": 0.0001584262295081967, | |
| "loss": 0.5356, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 31.531421598535694, | |
| "grad_norm": 4.205932140350342, | |
| "learning_rate": 0.00015829508196721313, | |
| "loss": 0.5295, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 31.629042098840756, | |
| "grad_norm": 4.171141147613525, | |
| "learning_rate": 0.00015816393442622952, | |
| "loss": 0.4502, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 31.726662599145822, | |
| "grad_norm": 3.7091734409332275, | |
| "learning_rate": 0.0001580327868852459, | |
| "loss": 0.4885, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 31.824283099450884, | |
| "grad_norm": 3.9756462574005127, | |
| "learning_rate": 0.0001579016393442623, | |
| "loss": 0.546, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 31.92190359975595, | |
| "grad_norm": 4.506149768829346, | |
| "learning_rate": 0.0001577704918032787, | |
| "loss": 0.513, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 32.01952410006101, | |
| "grad_norm": 3.1480486392974854, | |
| "learning_rate": 0.0001576393442622951, | |
| "loss": 0.5799, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 32.117144600366075, | |
| "grad_norm": 2.476172685623169, | |
| "learning_rate": 0.00015750819672131147, | |
| "loss": 0.455, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 32.214765100671144, | |
| "grad_norm": 3.0843114852905273, | |
| "learning_rate": 0.00015737704918032788, | |
| "loss": 0.4701, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 32.31238560097621, | |
| "grad_norm": 3.4512338638305664, | |
| "learning_rate": 0.00015724590163934427, | |
| "loss": 0.4908, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 32.41000610128127, | |
| "grad_norm": 3.0433006286621094, | |
| "learning_rate": 0.00015711475409836066, | |
| "loss": 0.4857, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 32.50762660158633, | |
| "grad_norm": 3.1231653690338135, | |
| "learning_rate": 0.00015698360655737705, | |
| "loss": 0.5053, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 32.6052471018914, | |
| "grad_norm": 3.121457576751709, | |
| "learning_rate": 0.00015685245901639344, | |
| "loss": 0.4962, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 32.70286760219646, | |
| "grad_norm": 3.6397838592529297, | |
| "learning_rate": 0.00015672131147540986, | |
| "loss": 0.5083, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 32.800488102501525, | |
| "grad_norm": 4.634298801422119, | |
| "learning_rate": 0.00015659016393442622, | |
| "loss": 0.5268, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 32.89810860280659, | |
| "grad_norm": 3.6741957664489746, | |
| "learning_rate": 0.00015645901639344263, | |
| "loss": 0.5518, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 32.99572910311166, | |
| "grad_norm": 3.7481977939605713, | |
| "learning_rate": 0.00015632786885245902, | |
| "loss": 0.5242, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 33.09334960341672, | |
| "grad_norm": 3.2336630821228027, | |
| "learning_rate": 0.0001561967213114754, | |
| "loss": 0.4854, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 33.19097010372178, | |
| "grad_norm": 3.153439521789551, | |
| "learning_rate": 0.0001560655737704918, | |
| "loss": 0.4512, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 33.288590604026844, | |
| "grad_norm": 4.410555839538574, | |
| "learning_rate": 0.0001559344262295082, | |
| "loss": 0.4135, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 33.38621110433191, | |
| "grad_norm": 4.749181747436523, | |
| "learning_rate": 0.0001558032786885246, | |
| "loss": 0.5023, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 33.483831604636975, | |
| "grad_norm": 5.4415459632873535, | |
| "learning_rate": 0.000155672131147541, | |
| "loss": 0.4793, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 33.58145210494204, | |
| "grad_norm": 4.493134498596191, | |
| "learning_rate": 0.0001555409836065574, | |
| "loss": 0.4827, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 33.6790726052471, | |
| "grad_norm": 5.345827579498291, | |
| "learning_rate": 0.00015540983606557378, | |
| "loss": 0.4878, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 33.77669310555217, | |
| "grad_norm": 5.483647346496582, | |
| "learning_rate": 0.00015527868852459017, | |
| "loss": 0.4976, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 33.87431360585723, | |
| "grad_norm": 3.2896759510040283, | |
| "learning_rate": 0.00015514754098360656, | |
| "loss": 0.5163, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 33.971934106162294, | |
| "grad_norm": 3.426711082458496, | |
| "learning_rate": 0.00015501639344262294, | |
| "loss": 0.6068, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 34.06955460646736, | |
| "grad_norm": 5.182300567626953, | |
| "learning_rate": 0.00015488524590163936, | |
| "loss": 0.4459, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 34.16717510677242, | |
| "grad_norm": 3.9542787075042725, | |
| "learning_rate": 0.00015475409836065575, | |
| "loss": 0.4688, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 34.26479560707749, | |
| "grad_norm": 3.966188669204712, | |
| "learning_rate": 0.00015462295081967214, | |
| "loss": 0.4913, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 34.36241610738255, | |
| "grad_norm": 3.175008535385132, | |
| "learning_rate": 0.00015449180327868853, | |
| "loss": 0.4564, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 34.46003660768761, | |
| "grad_norm": 5.1779584884643555, | |
| "learning_rate": 0.00015436065573770492, | |
| "loss": 0.4807, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 34.557657107992675, | |
| "grad_norm": 3.4940545558929443, | |
| "learning_rate": 0.00015422950819672133, | |
| "loss": 0.4948, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 34.655277608297745, | |
| "grad_norm": 4.1444196701049805, | |
| "learning_rate": 0.0001540983606557377, | |
| "loss": 0.4756, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 34.75289810860281, | |
| "grad_norm": 5.219560146331787, | |
| "learning_rate": 0.00015396721311475411, | |
| "loss": 0.5207, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 34.85051860890787, | |
| "grad_norm": 3.7527666091918945, | |
| "learning_rate": 0.0001538360655737705, | |
| "loss": 0.484, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 34.94813910921293, | |
| "grad_norm": 3.0565104484558105, | |
| "learning_rate": 0.0001537049180327869, | |
| "loss": 0.4795, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 35.045759609518, | |
| "grad_norm": 2.8091022968292236, | |
| "learning_rate": 0.00015357377049180328, | |
| "loss": 0.4657, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 35.14338010982306, | |
| "grad_norm": 2.8542001247406006, | |
| "learning_rate": 0.00015344262295081967, | |
| "loss": 0.4532, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 35.241000610128125, | |
| "grad_norm": 3.4882893562316895, | |
| "learning_rate": 0.0001533114754098361, | |
| "loss": 0.4629, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 35.33862111043319, | |
| "grad_norm": 3.3786075115203857, | |
| "learning_rate": 0.00015318032786885245, | |
| "loss": 0.4388, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 35.43624161073826, | |
| "grad_norm": 4.602392673492432, | |
| "learning_rate": 0.00015304918032786887, | |
| "loss": 0.4788, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 35.53386211104332, | |
| "grad_norm": 3.4427242279052734, | |
| "learning_rate": 0.00015291803278688526, | |
| "loss": 0.474, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 35.63148261134838, | |
| "grad_norm": 3.069155216217041, | |
| "learning_rate": 0.00015278688524590165, | |
| "loss": 0.4385, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 35.729103111653444, | |
| "grad_norm": 3.971026659011841, | |
| "learning_rate": 0.00015265573770491803, | |
| "loss": 0.5036, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 35.82672361195851, | |
| "grad_norm": 3.3750109672546387, | |
| "learning_rate": 0.00015252459016393442, | |
| "loss": 0.4957, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 35.924344112263576, | |
| "grad_norm": 3.382016181945801, | |
| "learning_rate": 0.00015239344262295084, | |
| "loss": 0.5176, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 36.02196461256864, | |
| "grad_norm": 2.5936732292175293, | |
| "learning_rate": 0.0001522622950819672, | |
| "loss": 0.4807, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 36.1195851128737, | |
| "grad_norm": 3.9948599338531494, | |
| "learning_rate": 0.00015213114754098362, | |
| "loss": 0.4285, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 36.21720561317877, | |
| "grad_norm": 3.3358399868011475, | |
| "learning_rate": 0.000152, | |
| "loss": 0.5013, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 36.31482611348383, | |
| "grad_norm": 3.7922635078430176, | |
| "learning_rate": 0.0001518688524590164, | |
| "loss": 0.4572, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 36.412446613788894, | |
| "grad_norm": 2.7253546714782715, | |
| "learning_rate": 0.0001517377049180328, | |
| "loss": 0.4151, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 36.51006711409396, | |
| "grad_norm": 5.344095706939697, | |
| "learning_rate": 0.00015160655737704918, | |
| "loss": 0.4718, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 36.607687614399026, | |
| "grad_norm": 3.768033981323242, | |
| "learning_rate": 0.0001514754098360656, | |
| "loss": 0.4406, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 36.70530811470409, | |
| "grad_norm": 3.884988784790039, | |
| "learning_rate": 0.00015134426229508198, | |
| "loss": 0.4605, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 36.80292861500915, | |
| "grad_norm": 4.368161678314209, | |
| "learning_rate": 0.00015121311475409837, | |
| "loss": 0.4609, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 36.90054911531421, | |
| "grad_norm": 5.337547779083252, | |
| "learning_rate": 0.00015108196721311476, | |
| "loss": 0.5078, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 36.99816961561928, | |
| "grad_norm": 3.7230191230773926, | |
| "learning_rate": 0.00015095081967213115, | |
| "loss": 0.5003, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 37.095790115924345, | |
| "grad_norm": 2.9239673614501953, | |
| "learning_rate": 0.00015081967213114754, | |
| "loss": 0.4312, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 37.19341061622941, | |
| "grad_norm": 2.9180755615234375, | |
| "learning_rate": 0.00015068852459016393, | |
| "loss": 0.4876, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 37.29103111653447, | |
| "grad_norm": 3.115180253982544, | |
| "learning_rate": 0.00015055737704918035, | |
| "loss": 0.4507, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 37.38865161683954, | |
| "grad_norm": 4.005005359649658, | |
| "learning_rate": 0.00015042622950819673, | |
| "loss": 0.458, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 37.4862721171446, | |
| "grad_norm": 3.3003063201904297, | |
| "learning_rate": 0.00015029508196721312, | |
| "loss": 0.4805, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 37.58389261744966, | |
| "grad_norm": 2.783266544342041, | |
| "learning_rate": 0.00015016393442622951, | |
| "loss": 0.4742, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 37.681513117754726, | |
| "grad_norm": 3.3052291870117188, | |
| "learning_rate": 0.0001500327868852459, | |
| "loss": 0.4415, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 37.779133618059795, | |
| "grad_norm": 4.352939605712891, | |
| "learning_rate": 0.00014990163934426232, | |
| "loss": 0.4489, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 37.87675411836486, | |
| "grad_norm": 2.9084181785583496, | |
| "learning_rate": 0.00014977049180327868, | |
| "loss": 0.439, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 37.97437461866992, | |
| "grad_norm": 3.3146231174468994, | |
| "learning_rate": 0.0001496393442622951, | |
| "loss": 0.4568, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 38.07199511897498, | |
| "grad_norm": 3.5614709854125977, | |
| "learning_rate": 0.0001495081967213115, | |
| "loss": 0.4648, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 38.16961561928005, | |
| "grad_norm": 3.782663345336914, | |
| "learning_rate": 0.00014937704918032788, | |
| "loss": 0.4636, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 38.267236119585114, | |
| "grad_norm": 3.2751007080078125, | |
| "learning_rate": 0.00014924590163934427, | |
| "loss": 0.4506, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 38.364856619890176, | |
| "grad_norm": 4.198697090148926, | |
| "learning_rate": 0.00014911475409836066, | |
| "loss": 0.4271, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 38.46247712019524, | |
| "grad_norm": 3.5415964126586914, | |
| "learning_rate": 0.00014898360655737707, | |
| "loss": 0.4112, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 38.56009762050031, | |
| "grad_norm": 3.3707010746002197, | |
| "learning_rate": 0.00014885245901639343, | |
| "loss": 0.4788, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 38.65771812080537, | |
| "grad_norm": 3.5206825733184814, | |
| "learning_rate": 0.00014872131147540985, | |
| "loss": 0.4456, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 38.75533862111043, | |
| "grad_norm": 3.870487689971924, | |
| "learning_rate": 0.00014859016393442624, | |
| "loss": 0.4346, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 38.852959121415495, | |
| "grad_norm": 4.031324863433838, | |
| "learning_rate": 0.00014845901639344263, | |
| "loss": 0.4272, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 38.950579621720564, | |
| "grad_norm": 3.552272081375122, | |
| "learning_rate": 0.00014832786885245902, | |
| "loss": 0.455, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 39.04820012202563, | |
| "grad_norm": 4.429355621337891, | |
| "learning_rate": 0.0001481967213114754, | |
| "loss": 0.474, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 39.14582062233069, | |
| "grad_norm": 2.7380106449127197, | |
| "learning_rate": 0.00014806557377049182, | |
| "loss": 0.3892, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 39.24344112263575, | |
| "grad_norm": 3.533512830734253, | |
| "learning_rate": 0.0001479344262295082, | |
| "loss": 0.4225, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 39.34106162294082, | |
| "grad_norm": 4.216946125030518, | |
| "learning_rate": 0.0001478032786885246, | |
| "loss": 0.4288, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 39.43868212324588, | |
| "grad_norm": 4.304781436920166, | |
| "learning_rate": 0.000147672131147541, | |
| "loss": 0.4678, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 39.536302623550945, | |
| "grad_norm": 3.1035361289978027, | |
| "learning_rate": 0.00014754098360655738, | |
| "loss": 0.436, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 39.63392312385601, | |
| "grad_norm": 4.312572956085205, | |
| "learning_rate": 0.00014740983606557377, | |
| "loss": 0.4418, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 39.73154362416108, | |
| "grad_norm": 4.513503074645996, | |
| "learning_rate": 0.00014727868852459016, | |
| "loss": 0.4613, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 39.82916412446614, | |
| "grad_norm": 3.7922120094299316, | |
| "learning_rate": 0.00014714754098360658, | |
| "loss": 0.4339, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 39.9267846247712, | |
| "grad_norm": 4.1964616775512695, | |
| "learning_rate": 0.00014701639344262297, | |
| "loss": 0.4799, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 40.024405125076264, | |
| "grad_norm": 3.0314066410064697, | |
| "learning_rate": 0.00014688524590163936, | |
| "loss": 0.4324, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 40.12202562538133, | |
| "grad_norm": 3.27500319480896, | |
| "learning_rate": 0.00014675409836065575, | |
| "loss": 0.4353, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 40.219646125686396, | |
| "grad_norm": 3.132371664047241, | |
| "learning_rate": 0.00014662295081967214, | |
| "loss": 0.4221, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 40.31726662599146, | |
| "grad_norm": 4.715281009674072, | |
| "learning_rate": 0.00014649180327868852, | |
| "loss": 0.4344, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 40.41488712629652, | |
| "grad_norm": 3.406825065612793, | |
| "learning_rate": 0.00014636065573770491, | |
| "loss": 0.4362, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 40.51250762660159, | |
| "grad_norm": 3.668811082839966, | |
| "learning_rate": 0.00014622950819672133, | |
| "loss": 0.4217, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 40.61012812690665, | |
| "grad_norm": 4.667581081390381, | |
| "learning_rate": 0.00014609836065573772, | |
| "loss": 0.4115, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 40.707748627211714, | |
| "grad_norm": 4.834279537200928, | |
| "learning_rate": 0.0001459672131147541, | |
| "loss": 0.4292, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 40.80536912751678, | |
| "grad_norm": 4.528176307678223, | |
| "learning_rate": 0.0001458360655737705, | |
| "loss": 0.4521, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 40.902989627821846, | |
| "grad_norm": 4.21350622177124, | |
| "learning_rate": 0.0001457049180327869, | |
| "loss": 0.4593, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 41.00061012812691, | |
| "grad_norm": 3.694138526916504, | |
| "learning_rate": 0.0001455737704918033, | |
| "loss": 0.434, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 41.09823062843197, | |
| "grad_norm": 3.2935163974761963, | |
| "learning_rate": 0.00014544262295081967, | |
| "loss": 0.3825, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 41.19585112873703, | |
| "grad_norm": 4.162698745727539, | |
| "learning_rate": 0.00014531147540983608, | |
| "loss": 0.3929, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 41.2934716290421, | |
| "grad_norm": 3.650602340698242, | |
| "learning_rate": 0.00014518032786885247, | |
| "loss": 0.4134, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 41.391092129347165, | |
| "grad_norm": 4.435211181640625, | |
| "learning_rate": 0.00014504918032786886, | |
| "loss": 0.4438, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 41.48871262965223, | |
| "grad_norm": 3.3488500118255615, | |
| "learning_rate": 0.00014491803278688525, | |
| "loss": 0.3997, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 41.58633312995729, | |
| "grad_norm": 3.115065574645996, | |
| "learning_rate": 0.00014478688524590164, | |
| "loss": 0.4443, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 41.68395363026236, | |
| "grad_norm": 3.3622965812683105, | |
| "learning_rate": 0.00014465573770491806, | |
| "loss": 0.4335, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 41.78157413056742, | |
| "grad_norm": 3.420353889465332, | |
| "learning_rate": 0.00014452459016393442, | |
| "loss": 0.4868, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 41.87919463087248, | |
| "grad_norm": 4.102779388427734, | |
| "learning_rate": 0.00014439344262295084, | |
| "loss": 0.4201, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 41.976815131177545, | |
| "grad_norm": 3.436736583709717, | |
| "learning_rate": 0.00014426229508196722, | |
| "loss": 0.4527, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 42.074435631482615, | |
| "grad_norm": 3.5886449813842773, | |
| "learning_rate": 0.00014413114754098361, | |
| "loss": 0.4072, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 42.17205613178768, | |
| "grad_norm": 3.397456645965576, | |
| "learning_rate": 0.000144, | |
| "loss": 0.3978, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 42.26967663209274, | |
| "grad_norm": 3.1985559463500977, | |
| "learning_rate": 0.0001438688524590164, | |
| "loss": 0.4335, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 42.3672971323978, | |
| "grad_norm": 3.780562400817871, | |
| "learning_rate": 0.0001437377049180328, | |
| "loss": 0.3899, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 42.464917632702864, | |
| "grad_norm": 3.2960257530212402, | |
| "learning_rate": 0.00014360655737704917, | |
| "loss": 0.4167, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 42.56253813300793, | |
| "grad_norm": 3.9879534244537354, | |
| "learning_rate": 0.0001434754098360656, | |
| "loss": 0.3799, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 42.660158633312996, | |
| "grad_norm": 4.146468162536621, | |
| "learning_rate": 0.00014334426229508198, | |
| "loss": 0.4341, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 42.75777913361806, | |
| "grad_norm": 3.994354724884033, | |
| "learning_rate": 0.00014321311475409837, | |
| "loss": 0.458, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 42.85539963392312, | |
| "grad_norm": 3.043018341064453, | |
| "learning_rate": 0.00014308196721311476, | |
| "loss": 0.4277, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 42.95302013422819, | |
| "grad_norm": 3.4882431030273438, | |
| "learning_rate": 0.00014295081967213115, | |
| "loss": 0.4491, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 43.05064063453325, | |
| "grad_norm": 3.6433932781219482, | |
| "learning_rate": 0.00014281967213114756, | |
| "loss": 0.4009, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 43.148261134838314, | |
| "grad_norm": 3.4401466846466064, | |
| "learning_rate": 0.00014268852459016395, | |
| "loss": 0.4191, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 43.24588163514338, | |
| "grad_norm": 3.963707685470581, | |
| "learning_rate": 0.00014255737704918034, | |
| "loss": 0.3961, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 43.343502135448446, | |
| "grad_norm": 3.0136444568634033, | |
| "learning_rate": 0.00014242622950819673, | |
| "loss": 0.4052, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 43.44112263575351, | |
| "grad_norm": 3.807661533355713, | |
| "learning_rate": 0.00014229508196721312, | |
| "loss": 0.3929, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 43.53874313605857, | |
| "grad_norm": 3.722825765609741, | |
| "learning_rate": 0.0001421639344262295, | |
| "loss": 0.4183, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 43.63636363636363, | |
| "grad_norm": 3.470569610595703, | |
| "learning_rate": 0.0001420327868852459, | |
| "loss": 0.4169, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 43.7339841366687, | |
| "grad_norm": 3.4966650009155273, | |
| "learning_rate": 0.00014190163934426231, | |
| "loss": 0.4121, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 43.831604636973765, | |
| "grad_norm": 4.7074174880981445, | |
| "learning_rate": 0.0001417704918032787, | |
| "loss": 0.4634, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 43.92922513727883, | |
| "grad_norm": 3.609379291534424, | |
| "learning_rate": 0.0001416393442622951, | |
| "loss": 0.4405, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 44.02684563758389, | |
| "grad_norm": 3.337123394012451, | |
| "learning_rate": 0.00014150819672131148, | |
| "loss": 0.3883, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 44.12446613788896, | |
| "grad_norm": 3.321878433227539, | |
| "learning_rate": 0.00014137704918032787, | |
| "loss": 0.4024, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 44.22208663819402, | |
| "grad_norm": 3.427896738052368, | |
| "learning_rate": 0.00014124590163934426, | |
| "loss": 0.4032, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 44.31970713849908, | |
| "grad_norm": 4.160934925079346, | |
| "learning_rate": 0.00014111475409836065, | |
| "loss": 0.3718, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 44.417327638804146, | |
| "grad_norm": 3.591139316558838, | |
| "learning_rate": 0.00014098360655737707, | |
| "loss": 0.3829, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 44.514948139109215, | |
| "grad_norm": 2.9896583557128906, | |
| "learning_rate": 0.00014085245901639346, | |
| "loss": 0.4231, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 44.61256863941428, | |
| "grad_norm": 4.670585632324219, | |
| "learning_rate": 0.00014072131147540985, | |
| "loss": 0.4285, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 44.71018913971934, | |
| "grad_norm": 4.087858200073242, | |
| "learning_rate": 0.00014059016393442624, | |
| "loss": 0.4061, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 44.8078096400244, | |
| "grad_norm": 4.61581563949585, | |
| "learning_rate": 0.00014045901639344262, | |
| "loss": 0.4245, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 44.90543014032947, | |
| "grad_norm": 4.715487957000732, | |
| "learning_rate": 0.00014032786885245904, | |
| "loss": 0.4223, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 45.003050640634534, | |
| "grad_norm": 3.8034169673919678, | |
| "learning_rate": 0.0001401967213114754, | |
| "loss": 0.407, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 45.100671140939596, | |
| "grad_norm": 2.813342332839966, | |
| "learning_rate": 0.00014006557377049182, | |
| "loss": 0.3936, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 45.19829164124466, | |
| "grad_norm": 3.130986452102661, | |
| "learning_rate": 0.0001399344262295082, | |
| "loss": 0.3897, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 45.29591214154973, | |
| "grad_norm": 3.6343700885772705, | |
| "learning_rate": 0.0001398032786885246, | |
| "loss": 0.3611, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 45.39353264185479, | |
| "grad_norm": 3.8157641887664795, | |
| "learning_rate": 0.000139672131147541, | |
| "loss": 0.419, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 45.49115314215985, | |
| "grad_norm": 3.9056179523468018, | |
| "learning_rate": 0.00013954098360655738, | |
| "loss": 0.3746, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 45.588773642464915, | |
| "grad_norm": 3.952216625213623, | |
| "learning_rate": 0.0001394098360655738, | |
| "loss": 0.3876, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 45.686394142769984, | |
| "grad_norm": 3.924835443496704, | |
| "learning_rate": 0.00013927868852459016, | |
| "loss": 0.4199, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 45.78401464307505, | |
| "grad_norm": 4.389228343963623, | |
| "learning_rate": 0.00013914754098360657, | |
| "loss": 0.3949, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 45.88163514338011, | |
| "grad_norm": 3.311279296875, | |
| "learning_rate": 0.00013901639344262296, | |
| "loss": 0.408, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 45.97925564368517, | |
| "grad_norm": 3.3349342346191406, | |
| "learning_rate": 0.00013888524590163935, | |
| "loss": 0.4578, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 46.07687614399024, | |
| "grad_norm": 2.8730790615081787, | |
| "learning_rate": 0.00013875409836065574, | |
| "loss": 0.3864, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 46.1744966442953, | |
| "grad_norm": 5.034847259521484, | |
| "learning_rate": 0.00013862295081967213, | |
| "loss": 0.3707, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 46.272117144600365, | |
| "grad_norm": 4.460078716278076, | |
| "learning_rate": 0.00013849180327868855, | |
| "loss": 0.3823, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 46.36973764490543, | |
| "grad_norm": 3.5436556339263916, | |
| "learning_rate": 0.0001383606557377049, | |
| "loss": 0.4063, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 46.4673581452105, | |
| "grad_norm": 3.1509130001068115, | |
| "learning_rate": 0.00013822950819672133, | |
| "loss": 0.3805, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 46.56497864551556, | |
| "grad_norm": 3.3589935302734375, | |
| "learning_rate": 0.00013809836065573771, | |
| "loss": 0.4332, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 46.66259914582062, | |
| "grad_norm": 3.7478573322296143, | |
| "learning_rate": 0.0001379672131147541, | |
| "loss": 0.3721, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 46.760219646125684, | |
| "grad_norm": 3.930676221847534, | |
| "learning_rate": 0.0001378360655737705, | |
| "loss": 0.3847, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 46.85784014643075, | |
| "grad_norm": 4.218331336975098, | |
| "learning_rate": 0.00013770491803278688, | |
| "loss": 0.4248, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 46.955460646735816, | |
| "grad_norm": 3.993345022201538, | |
| "learning_rate": 0.0001375737704918033, | |
| "loss": 0.3976, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 47.05308114704088, | |
| "grad_norm": 3.4169094562530518, | |
| "learning_rate": 0.0001374426229508197, | |
| "loss": 0.4126, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 47.15070164734594, | |
| "grad_norm": 3.3602797985076904, | |
| "learning_rate": 0.00013731147540983608, | |
| "loss": 0.3675, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 47.24832214765101, | |
| "grad_norm": 3.6925911903381348, | |
| "learning_rate": 0.00013718032786885247, | |
| "loss": 0.3732, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 47.34594264795607, | |
| "grad_norm": 3.3294148445129395, | |
| "learning_rate": 0.00013704918032786886, | |
| "loss": 0.3576, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 47.443563148261134, | |
| "grad_norm": 4.107402801513672, | |
| "learning_rate": 0.00013691803278688525, | |
| "loss": 0.376, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 47.5411836485662, | |
| "grad_norm": 3.5636045932769775, | |
| "learning_rate": 0.00013678688524590164, | |
| "loss": 0.3918, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 47.638804148871266, | |
| "grad_norm": 4.105456829071045, | |
| "learning_rate": 0.00013665573770491805, | |
| "loss": 0.3864, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 47.73642464917633, | |
| "grad_norm": 3.6053054332733154, | |
| "learning_rate": 0.00013652459016393444, | |
| "loss": 0.4009, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 47.83404514948139, | |
| "grad_norm": 4.397184371948242, | |
| "learning_rate": 0.00013639344262295083, | |
| "loss": 0.4094, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 47.93166564978645, | |
| "grad_norm": 3.121466875076294, | |
| "learning_rate": 0.00013626229508196722, | |
| "loss": 0.4276, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 48.02928615009152, | |
| "grad_norm": 3.9471912384033203, | |
| "learning_rate": 0.0001361311475409836, | |
| "loss": 0.3981, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 48.126906650396585, | |
| "grad_norm": 2.690577983856201, | |
| "learning_rate": 0.00013600000000000003, | |
| "loss": 0.3281, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 48.22452715070165, | |
| "grad_norm": 2.87288498878479, | |
| "learning_rate": 0.0001358688524590164, | |
| "loss": 0.4053, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 48.32214765100671, | |
| "grad_norm": 3.6237525939941406, | |
| "learning_rate": 0.0001357377049180328, | |
| "loss": 0.3436, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 48.41976815131178, | |
| "grad_norm": 3.1614110469818115, | |
| "learning_rate": 0.0001356065573770492, | |
| "loss": 0.3732, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 48.51738865161684, | |
| "grad_norm": 4.337550640106201, | |
| "learning_rate": 0.00013547540983606556, | |
| "loss": 0.3558, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 48.6150091519219, | |
| "grad_norm": 2.904919385910034, | |
| "learning_rate": 0.00013534426229508197, | |
| "loss": 0.4165, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 48.712629652226966, | |
| "grad_norm": 3.3883869647979736, | |
| "learning_rate": 0.00013521311475409836, | |
| "loss": 0.37, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 48.810250152532035, | |
| "grad_norm": 3.388711452484131, | |
| "learning_rate": 0.00013508196721311478, | |
| "loss": 0.4075, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 48.9078706528371, | |
| "grad_norm": 3.972365617752075, | |
| "learning_rate": 0.00013495081967213114, | |
| "loss": 0.3944, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 49.00549115314216, | |
| "grad_norm": 2.836026668548584, | |
| "learning_rate": 0.00013481967213114756, | |
| "loss": 0.4315, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 49.10311165344722, | |
| "grad_norm": 3.727874755859375, | |
| "learning_rate": 0.00013468852459016395, | |
| "loss": 0.2791, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 49.20073215375229, | |
| "grad_norm": 3.036069869995117, | |
| "learning_rate": 0.00013455737704918034, | |
| "loss": 0.358, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 49.298352654057354, | |
| "grad_norm": 3.044581651687622, | |
| "learning_rate": 0.00013442622950819673, | |
| "loss": 0.423, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 49.395973154362416, | |
| "grad_norm": 3.956517457962036, | |
| "learning_rate": 0.00013429508196721311, | |
| "loss": 0.3988, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 49.49359365466748, | |
| "grad_norm": 4.359031677246094, | |
| "learning_rate": 0.00013416393442622953, | |
| "loss": 0.352, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 49.59121415497255, | |
| "grad_norm": 4.619460582733154, | |
| "learning_rate": 0.0001340327868852459, | |
| "loss": 0.357, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 49.68883465527761, | |
| "grad_norm": 4.051796913146973, | |
| "learning_rate": 0.0001339016393442623, | |
| "loss": 0.4048, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 49.78645515558267, | |
| "grad_norm": 3.5017082691192627, | |
| "learning_rate": 0.0001337704918032787, | |
| "loss": 0.3937, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 49.884075655887735, | |
| "grad_norm": 4.194153308868408, | |
| "learning_rate": 0.0001336393442622951, | |
| "loss": 0.3989, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 49.981696156192804, | |
| "grad_norm": 3.5315494537353516, | |
| "learning_rate": 0.00013350819672131148, | |
| "loss": 0.4032, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 50.079316656497866, | |
| "grad_norm": 3.8597731590270996, | |
| "learning_rate": 0.00013337704918032787, | |
| "loss": 0.3786, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 50.17693715680293, | |
| "grad_norm": 3.1231532096862793, | |
| "learning_rate": 0.00013324590163934428, | |
| "loss": 0.3764, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 50.27455765710799, | |
| "grad_norm": 3.272097110748291, | |
| "learning_rate": 0.00013311475409836067, | |
| "loss": 0.3943, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 50.37217815741306, | |
| "grad_norm": 5.884640216827393, | |
| "learning_rate": 0.00013298360655737706, | |
| "loss": 0.312, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 50.46979865771812, | |
| "grad_norm": 3.424651622772217, | |
| "learning_rate": 0.00013285245901639345, | |
| "loss": 0.3602, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 50.567419158023185, | |
| "grad_norm": 3.703669786453247, | |
| "learning_rate": 0.00013272131147540984, | |
| "loss": 0.3992, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 50.66503965832825, | |
| "grad_norm": 3.4607040882110596, | |
| "learning_rate": 0.00013259016393442623, | |
| "loss": 0.3773, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 50.76266015863331, | |
| "grad_norm": 4.518139362335205, | |
| "learning_rate": 0.00013245901639344262, | |
| "loss": 0.3649, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 50.86028065893838, | |
| "grad_norm": 3.7281651496887207, | |
| "learning_rate": 0.00013232786885245904, | |
| "loss": 0.3797, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 50.95790115924344, | |
| "grad_norm": 3.4530558586120605, | |
| "learning_rate": 0.00013219672131147543, | |
| "loss": 0.3854, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 51.0555216595485, | |
| "grad_norm": 3.220987558364868, | |
| "learning_rate": 0.0001320655737704918, | |
| "loss": 0.3606, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 51.153142159853566, | |
| "grad_norm": 4.0888352394104, | |
| "learning_rate": 0.0001319344262295082, | |
| "loss": 0.3342, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 51.250762660158635, | |
| "grad_norm": 3.6605489253997803, | |
| "learning_rate": 0.0001318032786885246, | |
| "loss": 0.3785, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 51.3483831604637, | |
| "grad_norm": 3.1391944885253906, | |
| "learning_rate": 0.000131672131147541, | |
| "loss": 0.3741, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 51.44600366076876, | |
| "grad_norm": 4.535563945770264, | |
| "learning_rate": 0.00013154098360655737, | |
| "loss": 0.3897, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 51.54362416107382, | |
| "grad_norm": 2.85030198097229, | |
| "learning_rate": 0.0001314098360655738, | |
| "loss": 0.3335, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 51.64124466137889, | |
| "grad_norm": 3.172010660171509, | |
| "learning_rate": 0.00013127868852459018, | |
| "loss": 0.3223, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 51.738865161683954, | |
| "grad_norm": 3.834644079208374, | |
| "learning_rate": 0.00013114754098360654, | |
| "loss": 0.3897, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 51.836485661989016, | |
| "grad_norm": 3.7358360290527344, | |
| "learning_rate": 0.00013101639344262296, | |
| "loss": 0.3853, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 51.93410616229408, | |
| "grad_norm": 4.027935981750488, | |
| "learning_rate": 0.00013088524590163935, | |
| "loss": 0.3775, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 52.03172666259915, | |
| "grad_norm": 2.9343104362487793, | |
| "learning_rate": 0.00013075409836065576, | |
| "loss": 0.3878, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 52.12934716290421, | |
| "grad_norm": 3.9840517044067383, | |
| "learning_rate": 0.00013062295081967213, | |
| "loss": 0.3334, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 52.22696766320927, | |
| "grad_norm": 2.7341692447662354, | |
| "learning_rate": 0.00013049180327868854, | |
| "loss": 0.3643, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 52.324588163514335, | |
| "grad_norm": 4.158634662628174, | |
| "learning_rate": 0.00013036065573770493, | |
| "loss": 0.3497, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 52.422208663819404, | |
| "grad_norm": 3.440031051635742, | |
| "learning_rate": 0.00013022950819672132, | |
| "loss": 0.3917, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 52.51982916412447, | |
| "grad_norm": 3.250298500061035, | |
| "learning_rate": 0.0001300983606557377, | |
| "loss": 0.3472, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 52.61744966442953, | |
| "grad_norm": 3.672611951828003, | |
| "learning_rate": 0.0001299672131147541, | |
| "loss": 0.3315, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 52.71507016473459, | |
| "grad_norm": 2.9064407348632812, | |
| "learning_rate": 0.00012983606557377052, | |
| "loss": 0.4121, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 52.81269066503966, | |
| "grad_norm": 3.6620004177093506, | |
| "learning_rate": 0.00012970491803278688, | |
| "loss": 0.3402, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 52.91031116534472, | |
| "grad_norm": 4.237580299377441, | |
| "learning_rate": 0.0001295737704918033, | |
| "loss": 0.4054, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 53.007931665649785, | |
| "grad_norm": 5.529778957366943, | |
| "learning_rate": 0.00012944262295081968, | |
| "loss": 0.359, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 53.10555216595485, | |
| "grad_norm": 3.2727859020233154, | |
| "learning_rate": 0.00012931147540983607, | |
| "loss": 0.3431, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 53.20317266625992, | |
| "grad_norm": 3.652461290359497, | |
| "learning_rate": 0.00012918032786885246, | |
| "loss": 0.3441, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 53.30079316656498, | |
| "grad_norm": 3.462115526199341, | |
| "learning_rate": 0.00012904918032786885, | |
| "loss": 0.3754, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 53.39841366687004, | |
| "grad_norm": 2.771514654159546, | |
| "learning_rate": 0.00012891803278688527, | |
| "loss": 0.3568, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 53.496034167175104, | |
| "grad_norm": 3.806295394897461, | |
| "learning_rate": 0.00012878688524590166, | |
| "loss": 0.3357, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 53.59365466748017, | |
| "grad_norm": 4.707630634307861, | |
| "learning_rate": 0.00012865573770491802, | |
| "loss": 0.3461, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 53.691275167785236, | |
| "grad_norm": 4.196589946746826, | |
| "learning_rate": 0.00012852459016393444, | |
| "loss": 0.3578, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 53.7888956680903, | |
| "grad_norm": 3.576612949371338, | |
| "learning_rate": 0.00012839344262295083, | |
| "loss": 0.3946, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 53.88651616839536, | |
| "grad_norm": 3.4126014709472656, | |
| "learning_rate": 0.00012826229508196722, | |
| "loss": 0.3441, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 53.98413666870043, | |
| "grad_norm": 3.8211019039154053, | |
| "learning_rate": 0.0001281311475409836, | |
| "loss": 0.3652, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 54.08175716900549, | |
| "grad_norm": 3.447636127471924, | |
| "learning_rate": 0.00012800000000000002, | |
| "loss": 0.3315, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 54.179377669310554, | |
| "grad_norm": 4.556658744812012, | |
| "learning_rate": 0.0001278688524590164, | |
| "loss": 0.37, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 54.27699816961562, | |
| "grad_norm": 3.292505979537964, | |
| "learning_rate": 0.00012773770491803277, | |
| "loss": 0.3386, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 54.374618669920686, | |
| "grad_norm": 2.7513251304626465, | |
| "learning_rate": 0.0001276065573770492, | |
| "loss": 0.3497, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 54.47223917022575, | |
| "grad_norm": 3.0105299949645996, | |
| "learning_rate": 0.00012747540983606558, | |
| "loss": 0.3235, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 54.56985967053081, | |
| "grad_norm": 3.2643628120422363, | |
| "learning_rate": 0.000127344262295082, | |
| "loss": 0.3673, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 54.66748017083587, | |
| "grad_norm": 3.5350589752197266, | |
| "learning_rate": 0.00012721311475409836, | |
| "loss": 0.3733, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 54.76510067114094, | |
| "grad_norm": 5.337443828582764, | |
| "learning_rate": 0.00012708196721311477, | |
| "loss": 0.3376, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 54.862721171446005, | |
| "grad_norm": 3.618621826171875, | |
| "learning_rate": 0.00012695081967213116, | |
| "loss": 0.3372, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 54.96034167175107, | |
| "grad_norm": 3.5537171363830566, | |
| "learning_rate": 0.00012681967213114753, | |
| "loss": 0.3639, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 55.05796217205613, | |
| "grad_norm": 3.529486656188965, | |
| "learning_rate": 0.00012668852459016394, | |
| "loss": 0.3512, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 55.1555826723612, | |
| "grad_norm": 4.277002811431885, | |
| "learning_rate": 0.00012655737704918033, | |
| "loss": 0.3841, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 55.25320317266626, | |
| "grad_norm": 3.5343832969665527, | |
| "learning_rate": 0.00012642622950819675, | |
| "loss": 0.3757, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 55.35082367297132, | |
| "grad_norm": 3.350728988647461, | |
| "learning_rate": 0.0001262950819672131, | |
| "loss": 0.3179, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 55.448444173276386, | |
| "grad_norm": 4.037693500518799, | |
| "learning_rate": 0.0001261639344262295, | |
| "loss": 0.3685, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 55.546064673581455, | |
| "grad_norm": 4.390751838684082, | |
| "learning_rate": 0.00012603278688524592, | |
| "loss": 0.3161, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 55.64368517388652, | |
| "grad_norm": 4.671621322631836, | |
| "learning_rate": 0.0001259016393442623, | |
| "loss": 0.3268, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 55.74130567419158, | |
| "grad_norm": 4.637950897216797, | |
| "learning_rate": 0.0001257704918032787, | |
| "loss": 0.357, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 55.83892617449664, | |
| "grad_norm": 5.1185455322265625, | |
| "learning_rate": 0.00012563934426229508, | |
| "loss": 0.3628, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 55.93654667480171, | |
| "grad_norm": 5.654429912567139, | |
| "learning_rate": 0.0001255081967213115, | |
| "loss": 0.3519, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 56.034167175106774, | |
| "grad_norm": 3.041135549545288, | |
| "learning_rate": 0.00012537704918032786, | |
| "loss": 0.3529, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 56.131787675411836, | |
| "grad_norm": 4.020240783691406, | |
| "learning_rate": 0.00012524590163934425, | |
| "loss": 0.3597, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 56.2294081757169, | |
| "grad_norm": 3.730290412902832, | |
| "learning_rate": 0.00012511475409836067, | |
| "loss": 0.3137, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 56.32702867602197, | |
| "grad_norm": 3.314856767654419, | |
| "learning_rate": 0.00012498360655737706, | |
| "loss": 0.3122, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 56.42464917632703, | |
| "grad_norm": 4.214298248291016, | |
| "learning_rate": 0.00012485245901639345, | |
| "loss": 0.343, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 56.52226967663209, | |
| "grad_norm": 4.5516791343688965, | |
| "learning_rate": 0.00012472131147540984, | |
| "loss": 0.3714, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 56.619890176937155, | |
| "grad_norm": 5.098419666290283, | |
| "learning_rate": 0.00012459016393442625, | |
| "loss": 0.3489, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 56.717510677242224, | |
| "grad_norm": 2.3853330612182617, | |
| "learning_rate": 0.00012445901639344262, | |
| "loss": 0.3389, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 56.815131177547286, | |
| "grad_norm": 4.595917224884033, | |
| "learning_rate": 0.000124327868852459, | |
| "loss": 0.3392, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 56.91275167785235, | |
| "grad_norm": 4.49993896484375, | |
| "learning_rate": 0.00012419672131147542, | |
| "loss": 0.3419, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 57.01037217815741, | |
| "grad_norm": 3.2568199634552, | |
| "learning_rate": 0.0001240655737704918, | |
| "loss": 0.3614, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 57.10799267846248, | |
| "grad_norm": 4.653031826019287, | |
| "learning_rate": 0.0001239344262295082, | |
| "loss": 0.3096, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 57.20561317876754, | |
| "grad_norm": 2.5655269622802734, | |
| "learning_rate": 0.0001238032786885246, | |
| "loss": 0.3306, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 57.303233679072605, | |
| "grad_norm": 3.4758191108703613, | |
| "learning_rate": 0.000123672131147541, | |
| "loss": 0.3513, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 57.40085417937767, | |
| "grad_norm": 3.107544183731079, | |
| "learning_rate": 0.0001235409836065574, | |
| "loss": 0.3231, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 57.49847467968274, | |
| "grad_norm": 3.983060121536255, | |
| "learning_rate": 0.00012340983606557376, | |
| "loss": 0.3366, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 57.5960951799878, | |
| "grad_norm": 3.987680673599243, | |
| "learning_rate": 0.00012327868852459017, | |
| "loss": 0.3365, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 57.69371568029286, | |
| "grad_norm": 3.7637107372283936, | |
| "learning_rate": 0.00012314754098360656, | |
| "loss": 0.3432, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 57.79133618059792, | |
| "grad_norm": 3.193894386291504, | |
| "learning_rate": 0.00012301639344262295, | |
| "loss": 0.3668, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 57.88895668090299, | |
| "grad_norm": 3.078507423400879, | |
| "learning_rate": 0.00012288524590163934, | |
| "loss": 0.3922, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 57.986577181208055, | |
| "grad_norm": 3.561068058013916, | |
| "learning_rate": 0.00012275409836065573, | |
| "loss": 0.3285, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 58.08419768151312, | |
| "grad_norm": 2.9965078830718994, | |
| "learning_rate": 0.00012262295081967215, | |
| "loss": 0.2991, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 58.18181818181818, | |
| "grad_norm": 3.8305752277374268, | |
| "learning_rate": 0.0001224918032786885, | |
| "loss": 0.3728, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 58.27943868212325, | |
| "grad_norm": 3.708678960800171, | |
| "learning_rate": 0.00012236065573770493, | |
| "loss": 0.3078, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 58.37705918242831, | |
| "grad_norm": 3.8988072872161865, | |
| "learning_rate": 0.00012222950819672132, | |
| "loss": 0.3389, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 58.474679682733374, | |
| "grad_norm": 3.550420045852661, | |
| "learning_rate": 0.00012209836065573773, | |
| "loss": 0.4069, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 58.572300183038436, | |
| "grad_norm": 3.799747943878174, | |
| "learning_rate": 0.0001219672131147541, | |
| "loss": 0.3058, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 58.669920683343506, | |
| "grad_norm": 4.734778881072998, | |
| "learning_rate": 0.0001218360655737705, | |
| "loss": 0.3199, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 58.76754118364857, | |
| "grad_norm": 3.449169158935547, | |
| "learning_rate": 0.0001217049180327869, | |
| "loss": 0.3309, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 58.86516168395363, | |
| "grad_norm": 3.164651393890381, | |
| "learning_rate": 0.00012157377049180328, | |
| "loss": 0.3338, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 58.96278218425869, | |
| "grad_norm": 3.976644515991211, | |
| "learning_rate": 0.00012144262295081968, | |
| "loss": 0.3207, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 59.060402684563755, | |
| "grad_norm": 4.6017632484436035, | |
| "learning_rate": 0.00012131147540983607, | |
| "loss": 0.317, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 59.158023184868824, | |
| "grad_norm": 6.806497573852539, | |
| "learning_rate": 0.00012118032786885247, | |
| "loss": 0.2943, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 59.25564368517389, | |
| "grad_norm": 3.545241355895996, | |
| "learning_rate": 0.00012104918032786885, | |
| "loss": 0.3093, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 59.35326418547895, | |
| "grad_norm": 2.435683488845825, | |
| "learning_rate": 0.00012091803278688525, | |
| "loss": 0.3235, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 59.45088468578401, | |
| "grad_norm": 3.507638931274414, | |
| "learning_rate": 0.00012078688524590165, | |
| "loss": 0.3405, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 59.54850518608908, | |
| "grad_norm": 3.9438552856445312, | |
| "learning_rate": 0.00012065573770491804, | |
| "loss": 0.3311, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 59.64612568639414, | |
| "grad_norm": 5.4473652839660645, | |
| "learning_rate": 0.00012052459016393443, | |
| "loss": 0.3424, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 59.743746186699205, | |
| "grad_norm": 3.204779624938965, | |
| "learning_rate": 0.00012039344262295082, | |
| "loss": 0.3297, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 59.84136668700427, | |
| "grad_norm": 3.356764793395996, | |
| "learning_rate": 0.00012026229508196722, | |
| "loss": 0.3698, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 59.93898718730934, | |
| "grad_norm": 3.6063504219055176, | |
| "learning_rate": 0.0001201311475409836, | |
| "loss": 0.3479, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 60.0366076876144, | |
| "grad_norm": 3.49246883392334, | |
| "learning_rate": 0.00012, | |
| "loss": 0.3394, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 60.13422818791946, | |
| "grad_norm": 4.206517219543457, | |
| "learning_rate": 0.0001198688524590164, | |
| "loss": 0.2825, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 60.231848688224524, | |
| "grad_norm": 3.532618999481201, | |
| "learning_rate": 0.0001197377049180328, | |
| "loss": 0.3162, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 60.32946918852959, | |
| "grad_norm": 3.6355433464050293, | |
| "learning_rate": 0.00011960655737704917, | |
| "loss": 0.3138, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 60.427089688834656, | |
| "grad_norm": 3.1386282444000244, | |
| "learning_rate": 0.00011947540983606557, | |
| "loss": 0.335, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 60.52471018913972, | |
| "grad_norm": 2.946789026260376, | |
| "learning_rate": 0.00011934426229508198, | |
| "loss": 0.321, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 60.62233068944478, | |
| "grad_norm": 4.056975841522217, | |
| "learning_rate": 0.00011921311475409838, | |
| "loss": 0.3647, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 60.71995118974985, | |
| "grad_norm": 3.8449249267578125, | |
| "learning_rate": 0.00011908196721311476, | |
| "loss": 0.3189, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 60.81757169005491, | |
| "grad_norm": 3.464470148086548, | |
| "learning_rate": 0.00011895081967213116, | |
| "loss": 0.3711, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 60.915192190359974, | |
| "grad_norm": 3.6908376216888428, | |
| "learning_rate": 0.00011881967213114755, | |
| "loss": 0.3303, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 61.01281269066504, | |
| "grad_norm": 4.723835468292236, | |
| "learning_rate": 0.00011868852459016392, | |
| "loss": 0.305, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 61.110433190970106, | |
| "grad_norm": 3.6280064582824707, | |
| "learning_rate": 0.00011855737704918033, | |
| "loss": 0.3103, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 61.20805369127517, | |
| "grad_norm": 3.640411138534546, | |
| "learning_rate": 0.00011842622950819673, | |
| "loss": 0.3017, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 61.30567419158023, | |
| "grad_norm": 4.304122447967529, | |
| "learning_rate": 0.00011829508196721313, | |
| "loss": 0.3452, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 61.40329469188529, | |
| "grad_norm": 3.4708502292633057, | |
| "learning_rate": 0.00011816393442622951, | |
| "loss": 0.3195, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 61.50091519219036, | |
| "grad_norm": 3.257230758666992, | |
| "learning_rate": 0.00011803278688524591, | |
| "loss": 0.3115, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 61.598535692495425, | |
| "grad_norm": 3.0247697830200195, | |
| "learning_rate": 0.0001179016393442623, | |
| "loss": 0.314, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 61.69615619280049, | |
| "grad_norm": 2.5768277645111084, | |
| "learning_rate": 0.0001177704918032787, | |
| "loss": 0.3195, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 61.79377669310555, | |
| "grad_norm": 4.08043909072876, | |
| "learning_rate": 0.00011763934426229508, | |
| "loss": 0.3312, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 61.89139719341062, | |
| "grad_norm": 5.277688503265381, | |
| "learning_rate": 0.00011750819672131148, | |
| "loss": 0.3607, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 61.98901769371568, | |
| "grad_norm": 4.2203288078308105, | |
| "learning_rate": 0.00011737704918032789, | |
| "loss": 0.3233, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 62.08663819402074, | |
| "grad_norm": 3.8487565517425537, | |
| "learning_rate": 0.00011724590163934426, | |
| "loss": 0.2777, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 62.184258694325806, | |
| "grad_norm": 3.509904623031616, | |
| "learning_rate": 0.00011711475409836066, | |
| "loss": 0.3418, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 62.281879194630875, | |
| "grad_norm": 3.7442731857299805, | |
| "learning_rate": 0.00011698360655737705, | |
| "loss": 0.3115, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 62.37949969493594, | |
| "grad_norm": 3.3927197456359863, | |
| "learning_rate": 0.00011685245901639346, | |
| "loss": 0.3304, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 62.477120195241, | |
| "grad_norm": 4.529331684112549, | |
| "learning_rate": 0.00011672131147540983, | |
| "loss": 0.3246, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 62.57474069554606, | |
| "grad_norm": 2.9640207290649414, | |
| "learning_rate": 0.00011659016393442623, | |
| "loss": 0.3228, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 62.67236119585113, | |
| "grad_norm": 4.769125938415527, | |
| "learning_rate": 0.00011645901639344264, | |
| "loss": 0.3552, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 62.769981696156194, | |
| "grad_norm": 4.817038536071777, | |
| "learning_rate": 0.00011632786885245903, | |
| "loss": 0.269, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 62.867602196461256, | |
| "grad_norm": 4.221793174743652, | |
| "learning_rate": 0.0001161967213114754, | |
| "loss": 0.3192, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 62.96522269676632, | |
| "grad_norm": 3.8652241230010986, | |
| "learning_rate": 0.0001160655737704918, | |
| "loss": 0.3415, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 63.06284319707139, | |
| "grad_norm": 3.7543272972106934, | |
| "learning_rate": 0.00011593442622950821, | |
| "loss": 0.2707, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 63.16046369737645, | |
| "grad_norm": 3.2344672679901123, | |
| "learning_rate": 0.00011580327868852458, | |
| "loss": 0.3133, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 63.25808419768151, | |
| "grad_norm": 2.9289095401763916, | |
| "learning_rate": 0.00011567213114754099, | |
| "loss": 0.3111, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 63.355704697986575, | |
| "grad_norm": 4.034417629241943, | |
| "learning_rate": 0.00011554098360655739, | |
| "loss": 0.3443, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 63.453325198291644, | |
| "grad_norm": 3.571948528289795, | |
| "learning_rate": 0.00011540983606557378, | |
| "loss": 0.3061, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 63.550945698596706, | |
| "grad_norm": 3.8213961124420166, | |
| "learning_rate": 0.00011527868852459016, | |
| "loss": 0.2943, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 63.64856619890177, | |
| "grad_norm": 4.343628883361816, | |
| "learning_rate": 0.00011514754098360656, | |
| "loss": 0.3499, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 63.74618669920683, | |
| "grad_norm": 3.373922348022461, | |
| "learning_rate": 0.00011501639344262296, | |
| "loss": 0.3193, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 63.8438071995119, | |
| "grad_norm": 4.19200325012207, | |
| "learning_rate": 0.00011488524590163936, | |
| "loss": 0.3221, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 63.94142769981696, | |
| "grad_norm": 3.28054141998291, | |
| "learning_rate": 0.00011475409836065574, | |
| "loss": 0.328, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 64.03904820012202, | |
| "grad_norm": 3.887531042098999, | |
| "learning_rate": 0.00011462295081967214, | |
| "loss": 0.3161, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 64.1366687004271, | |
| "grad_norm": 3.633239507675171, | |
| "learning_rate": 0.00011449180327868853, | |
| "loss": 0.3001, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 64.23428920073215, | |
| "grad_norm": 3.5353002548217773, | |
| "learning_rate": 0.00011436065573770491, | |
| "loss": 0.3085, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 64.33190970103722, | |
| "grad_norm": 3.3285086154937744, | |
| "learning_rate": 0.00011422950819672131, | |
| "loss": 0.2807, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 64.42953020134229, | |
| "grad_norm": 4.186200141906738, | |
| "learning_rate": 0.00011409836065573771, | |
| "loss": 0.3072, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 64.52715070164734, | |
| "grad_norm": 4.000847339630127, | |
| "learning_rate": 0.00011396721311475412, | |
| "loss": 0.2794, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 64.62477120195241, | |
| "grad_norm": 3.643327474594116, | |
| "learning_rate": 0.00011383606557377049, | |
| "loss": 0.3292, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 64.72239170225747, | |
| "grad_norm": 4.381191730499268, | |
| "learning_rate": 0.00011370491803278688, | |
| "loss": 0.3311, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 64.82001220256254, | |
| "grad_norm": 3.9231462478637695, | |
| "learning_rate": 0.00011357377049180329, | |
| "loss": 0.3239, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 64.9176327028676, | |
| "grad_norm": 3.68272066116333, | |
| "learning_rate": 0.00011344262295081969, | |
| "loss": 0.3573, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 65.01525320317266, | |
| "grad_norm": 3.5405211448669434, | |
| "learning_rate": 0.00011331147540983606, | |
| "loss": 0.3026, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 65.11287370347773, | |
| "grad_norm": 4.24603796005249, | |
| "learning_rate": 0.00011318032786885247, | |
| "loss": 0.2984, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 65.2104942037828, | |
| "grad_norm": 3.9128637313842773, | |
| "learning_rate": 0.00011304918032786887, | |
| "loss": 0.3203, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 65.30811470408786, | |
| "grad_norm": 3.794922113418579, | |
| "learning_rate": 0.00011291803278688525, | |
| "loss": 0.2696, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 65.40573520439293, | |
| "grad_norm": 7.95392370223999, | |
| "learning_rate": 0.00011278688524590164, | |
| "loss": 0.3349, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 65.50335570469798, | |
| "grad_norm": 4.138427257537842, | |
| "learning_rate": 0.00011265573770491804, | |
| "loss": 0.303, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 65.60097620500305, | |
| "grad_norm": 3.470979690551758, | |
| "learning_rate": 0.00011252459016393444, | |
| "loss": 0.3019, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 65.69859670530812, | |
| "grad_norm": 3.5186235904693604, | |
| "learning_rate": 0.00011239344262295082, | |
| "loss": 0.3254, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 65.79621720561317, | |
| "grad_norm": 5.092533111572266, | |
| "learning_rate": 0.00011226229508196722, | |
| "loss": 0.3167, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 65.89383770591824, | |
| "grad_norm": 4.472383499145508, | |
| "learning_rate": 0.00011213114754098362, | |
| "loss": 0.3102, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 65.99145820622331, | |
| "grad_norm": 4.198476314544678, | |
| "learning_rate": 0.00011200000000000001, | |
| "loss": 0.3085, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 66.08907870652837, | |
| "grad_norm": 4.235730171203613, | |
| "learning_rate": 0.00011186885245901639, | |
| "loss": 0.2754, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 66.18669920683344, | |
| "grad_norm": 3.673928737640381, | |
| "learning_rate": 0.00011173770491803279, | |
| "loss": 0.2883, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 66.2843197071385, | |
| "grad_norm": 3.8741283416748047, | |
| "learning_rate": 0.0001116065573770492, | |
| "loss": 0.3055, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 66.38194020744356, | |
| "grad_norm": 3.2982194423675537, | |
| "learning_rate": 0.00011147540983606557, | |
| "loss": 0.3309, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 66.47956070774863, | |
| "grad_norm": 3.1477532386779785, | |
| "learning_rate": 0.00011134426229508197, | |
| "loss": 0.299, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 66.57718120805369, | |
| "grad_norm": 3.723947525024414, | |
| "learning_rate": 0.00011121311475409838, | |
| "loss": 0.3016, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 66.67480170835876, | |
| "grad_norm": 4.094952583312988, | |
| "learning_rate": 0.00011108196721311476, | |
| "loss": 0.3045, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 66.77242220866383, | |
| "grad_norm": 3.607595205307007, | |
| "learning_rate": 0.00011095081967213114, | |
| "loss": 0.2983, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 66.87004270896888, | |
| "grad_norm": 4.301547527313232, | |
| "learning_rate": 0.00011081967213114754, | |
| "loss": 0.2783, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 66.96766320927395, | |
| "grad_norm": 3.570128917694092, | |
| "learning_rate": 0.00011068852459016395, | |
| "loss": 0.3547, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 67.065283709579, | |
| "grad_norm": 4.2378621101379395, | |
| "learning_rate": 0.00011055737704918035, | |
| "loss": 0.3097, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 67.16290420988408, | |
| "grad_norm": 3.6283559799194336, | |
| "learning_rate": 0.00011042622950819672, | |
| "loss": 0.2773, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 67.26052471018915, | |
| "grad_norm": 3.317201614379883, | |
| "learning_rate": 0.00011029508196721311, | |
| "loss": 0.3179, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 67.3581452104942, | |
| "grad_norm": 3.120859146118164, | |
| "learning_rate": 0.00011016393442622952, | |
| "loss": 0.3239, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 67.45576571079927, | |
| "grad_norm": 3.163276433944702, | |
| "learning_rate": 0.00011003278688524589, | |
| "loss": 0.2875, | |
| "step": 13820 | |
| }, | |
| { | |
| "epoch": 67.55338621110434, | |
| "grad_norm": 3.597426652908325, | |
| "learning_rate": 0.0001099016393442623, | |
| "loss": 0.2739, | |
| "step": 13840 | |
| }, | |
| { | |
| "epoch": 67.6510067114094, | |
| "grad_norm": 3.572763204574585, | |
| "learning_rate": 0.0001097704918032787, | |
| "loss": 0.3009, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 67.74862721171446, | |
| "grad_norm": 3.660034656524658, | |
| "learning_rate": 0.0001096393442622951, | |
| "loss": 0.3089, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 67.84624771201952, | |
| "grad_norm": 4.177897930145264, | |
| "learning_rate": 0.00010950819672131148, | |
| "loss": 0.3205, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 67.94386821232459, | |
| "grad_norm": 5.586677074432373, | |
| "learning_rate": 0.00010937704918032787, | |
| "loss": 0.3263, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 68.04148871262966, | |
| "grad_norm": 2.8578503131866455, | |
| "learning_rate": 0.00010924590163934427, | |
| "loss": 0.3139, | |
| "step": 13940 | |
| }, | |
| { | |
| "epoch": 68.13910921293471, | |
| "grad_norm": 3.129765748977661, | |
| "learning_rate": 0.00010911475409836067, | |
| "loss": 0.2729, | |
| "step": 13960 | |
| }, | |
| { | |
| "epoch": 68.23672971323978, | |
| "grad_norm": 3.238567590713501, | |
| "learning_rate": 0.00010898360655737705, | |
| "loss": 0.3008, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 68.33435021354484, | |
| "grad_norm": 3.5786325931549072, | |
| "learning_rate": 0.00010885245901639345, | |
| "loss": 0.2818, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 68.4319707138499, | |
| "grad_norm": 3.5092949867248535, | |
| "learning_rate": 0.00010872131147540985, | |
| "loss": 0.3034, | |
| "step": 14020 | |
| }, | |
| { | |
| "epoch": 68.52959121415498, | |
| "grad_norm": 4.172147274017334, | |
| "learning_rate": 0.00010859016393442623, | |
| "loss": 0.2917, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 68.62721171446003, | |
| "grad_norm": 3.5723907947540283, | |
| "learning_rate": 0.00010845901639344262, | |
| "loss": 0.3119, | |
| "step": 14060 | |
| }, | |
| { | |
| "epoch": 68.7248322147651, | |
| "grad_norm": 3.2721340656280518, | |
| "learning_rate": 0.00010832786885245902, | |
| "loss": 0.2698, | |
| "step": 14080 | |
| }, | |
| { | |
| "epoch": 68.82245271507017, | |
| "grad_norm": 4.181795597076416, | |
| "learning_rate": 0.00010819672131147543, | |
| "loss": 0.3181, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 68.92007321537523, | |
| "grad_norm": 4.145654678344727, | |
| "learning_rate": 0.0001080655737704918, | |
| "loss": 0.2837, | |
| "step": 14120 | |
| }, | |
| { | |
| "epoch": 69.0176937156803, | |
| "grad_norm": 4.098430633544922, | |
| "learning_rate": 0.0001079344262295082, | |
| "loss": 0.3281, | |
| "step": 14140 | |
| }, | |
| { | |
| "epoch": 69.11531421598535, | |
| "grad_norm": 3.6833229064941406, | |
| "learning_rate": 0.00010780327868852461, | |
| "loss": 0.3059, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 69.21293471629042, | |
| "grad_norm": 3.802940845489502, | |
| "learning_rate": 0.00010767213114754098, | |
| "loss": 0.2648, | |
| "step": 14180 | |
| }, | |
| { | |
| "epoch": 69.31055521659549, | |
| "grad_norm": 2.9509127140045166, | |
| "learning_rate": 0.00010754098360655737, | |
| "loss": 0.2828, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 69.40817571690054, | |
| "grad_norm": 3.6474950313568115, | |
| "learning_rate": 0.00010740983606557378, | |
| "loss": 0.2848, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 69.50579621720561, | |
| "grad_norm": 3.762017250061035, | |
| "learning_rate": 0.00010727868852459018, | |
| "loss": 0.3305, | |
| "step": 14240 | |
| }, | |
| { | |
| "epoch": 69.60341671751068, | |
| "grad_norm": 3.3296637535095215, | |
| "learning_rate": 0.00010714754098360655, | |
| "loss": 0.2885, | |
| "step": 14260 | |
| }, | |
| { | |
| "epoch": 69.70103721781574, | |
| "grad_norm": 4.1584296226501465, | |
| "learning_rate": 0.00010701639344262296, | |
| "loss": 0.3162, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 69.79865771812081, | |
| "grad_norm": 3.365910768508911, | |
| "learning_rate": 0.00010688524590163935, | |
| "loss": 0.2855, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 69.89627821842586, | |
| "grad_norm": 3.9548261165618896, | |
| "learning_rate": 0.00010675409836065575, | |
| "loss": 0.3001, | |
| "step": 14320 | |
| }, | |
| { | |
| "epoch": 69.99389871873093, | |
| "grad_norm": 4.556044578552246, | |
| "learning_rate": 0.00010662295081967212, | |
| "loss": 0.2855, | |
| "step": 14340 | |
| }, | |
| { | |
| "epoch": 70.091519219036, | |
| "grad_norm": 4.781431198120117, | |
| "learning_rate": 0.00010649180327868853, | |
| "loss": 0.2741, | |
| "step": 14360 | |
| }, | |
| { | |
| "epoch": 70.18913971934106, | |
| "grad_norm": 2.645362377166748, | |
| "learning_rate": 0.00010636065573770493, | |
| "loss": 0.2885, | |
| "step": 14380 | |
| }, | |
| { | |
| "epoch": 70.28676021964613, | |
| "grad_norm": 4.382370471954346, | |
| "learning_rate": 0.0001062295081967213, | |
| "loss": 0.3038, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 70.3843807199512, | |
| "grad_norm": 2.908954381942749, | |
| "learning_rate": 0.00010609836065573771, | |
| "loss": 0.2765, | |
| "step": 14420 | |
| }, | |
| { | |
| "epoch": 70.48200122025625, | |
| "grad_norm": 3.298417091369629, | |
| "learning_rate": 0.0001059672131147541, | |
| "loss": 0.2986, | |
| "step": 14440 | |
| }, | |
| { | |
| "epoch": 70.57962172056132, | |
| "grad_norm": 3.324769973754883, | |
| "learning_rate": 0.0001058360655737705, | |
| "loss": 0.2712, | |
| "step": 14460 | |
| }, | |
| { | |
| "epoch": 70.67724222086638, | |
| "grad_norm": 4.584010124206543, | |
| "learning_rate": 0.00010570491803278688, | |
| "loss": 0.3049, | |
| "step": 14480 | |
| }, | |
| { | |
| "epoch": 70.77486272117144, | |
| "grad_norm": 3.648829221725464, | |
| "learning_rate": 0.00010557377049180328, | |
| "loss": 0.3169, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 70.87248322147651, | |
| "grad_norm": 3.729729175567627, | |
| "learning_rate": 0.00010544262295081968, | |
| "loss": 0.2818, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 70.97010372178157, | |
| "grad_norm": 3.572127103805542, | |
| "learning_rate": 0.00010531147540983609, | |
| "loss": 0.303, | |
| "step": 14540 | |
| }, | |
| { | |
| "epoch": 71.06772422208664, | |
| "grad_norm": 3.4848947525024414, | |
| "learning_rate": 0.00010518032786885246, | |
| "loss": 0.2796, | |
| "step": 14560 | |
| }, | |
| { | |
| "epoch": 71.16534472239171, | |
| "grad_norm": 3.478776454925537, | |
| "learning_rate": 0.00010504918032786885, | |
| "loss": 0.2718, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 71.26296522269676, | |
| "grad_norm": 3.6878888607025146, | |
| "learning_rate": 0.00010491803278688525, | |
| "loss": 0.2611, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 71.36058572300183, | |
| "grad_norm": 3.0455853939056396, | |
| "learning_rate": 0.00010478688524590163, | |
| "loss": 0.3063, | |
| "step": 14620 | |
| }, | |
| { | |
| "epoch": 71.45820622330689, | |
| "grad_norm": 4.242178916931152, | |
| "learning_rate": 0.00010465573770491803, | |
| "loss": 0.2811, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 71.55582672361196, | |
| "grad_norm": 2.9485058784484863, | |
| "learning_rate": 0.00010452459016393444, | |
| "loss": 0.278, | |
| "step": 14660 | |
| }, | |
| { | |
| "epoch": 71.65344722391703, | |
| "grad_norm": 4.489981651306152, | |
| "learning_rate": 0.00010439344262295083, | |
| "loss": 0.2833, | |
| "step": 14680 | |
| }, | |
| { | |
| "epoch": 71.75106772422208, | |
| "grad_norm": 3.748349666595459, | |
| "learning_rate": 0.00010426229508196721, | |
| "loss": 0.3362, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 71.84868822452715, | |
| "grad_norm": 3.5393285751342773, | |
| "learning_rate": 0.0001041311475409836, | |
| "loss": 0.2803, | |
| "step": 14720 | |
| }, | |
| { | |
| "epoch": 71.94630872483222, | |
| "grad_norm": 3.8631341457366943, | |
| "learning_rate": 0.00010400000000000001, | |
| "loss": 0.3103, | |
| "step": 14740 | |
| }, | |
| { | |
| "epoch": 72.04392922513728, | |
| "grad_norm": 3.3096232414245605, | |
| "learning_rate": 0.00010386885245901641, | |
| "loss": 0.3059, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 72.14154972544235, | |
| "grad_norm": 3.3514437675476074, | |
| "learning_rate": 0.00010373770491803279, | |
| "loss": 0.2832, | |
| "step": 14780 | |
| }, | |
| { | |
| "epoch": 72.2391702257474, | |
| "grad_norm": 3.548172950744629, | |
| "learning_rate": 0.00010360655737704919, | |
| "loss": 0.3087, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 72.33679072605247, | |
| "grad_norm": 2.9847452640533447, | |
| "learning_rate": 0.00010347540983606558, | |
| "loss": 0.2565, | |
| "step": 14820 | |
| }, | |
| { | |
| "epoch": 72.43441122635754, | |
| "grad_norm": 2.827442169189453, | |
| "learning_rate": 0.00010334426229508197, | |
| "loss": 0.2622, | |
| "step": 14840 | |
| }, | |
| { | |
| "epoch": 72.5320317266626, | |
| "grad_norm": 5.385785102844238, | |
| "learning_rate": 0.00010321311475409836, | |
| "loss": 0.2684, | |
| "step": 14860 | |
| }, | |
| { | |
| "epoch": 72.62965222696766, | |
| "grad_norm": 2.8758130073547363, | |
| "learning_rate": 0.00010308196721311476, | |
| "loss": 0.3032, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 72.72727272727273, | |
| "grad_norm": 3.0120010375976562, | |
| "learning_rate": 0.00010295081967213116, | |
| "loss": 0.2822, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 72.82489322757779, | |
| "grad_norm": 4.078627109527588, | |
| "learning_rate": 0.00010281967213114754, | |
| "loss": 0.2852, | |
| "step": 14920 | |
| }, | |
| { | |
| "epoch": 72.92251372788286, | |
| "grad_norm": 3.6309173107147217, | |
| "learning_rate": 0.00010268852459016394, | |
| "loss": 0.2961, | |
| "step": 14940 | |
| }, | |
| { | |
| "epoch": 73.02013422818791, | |
| "grad_norm": 5.154388427734375, | |
| "learning_rate": 0.00010255737704918033, | |
| "loss": 0.2926, | |
| "step": 14960 | |
| }, | |
| { | |
| "epoch": 73.11775472849298, | |
| "grad_norm": 3.6091132164001465, | |
| "learning_rate": 0.00010242622950819673, | |
| "loss": 0.2855, | |
| "step": 14980 | |
| }, | |
| { | |
| "epoch": 73.21537522879805, | |
| "grad_norm": 3.872344970703125, | |
| "learning_rate": 0.00010229508196721311, | |
| "loss": 0.2728, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 73.31299572910311, | |
| "grad_norm": 3.2357425689697266, | |
| "learning_rate": 0.00010216393442622951, | |
| "loss": 0.3022, | |
| "step": 15020 | |
| }, | |
| { | |
| "epoch": 73.41061622940818, | |
| "grad_norm": 3.10544490814209, | |
| "learning_rate": 0.00010203278688524592, | |
| "loss": 0.2792, | |
| "step": 15040 | |
| }, | |
| { | |
| "epoch": 73.50823672971325, | |
| "grad_norm": 3.796975612640381, | |
| "learning_rate": 0.00010190163934426229, | |
| "loss": 0.2975, | |
| "step": 15060 | |
| }, | |
| { | |
| "epoch": 73.6058572300183, | |
| "grad_norm": 4.492702484130859, | |
| "learning_rate": 0.0001017704918032787, | |
| "loss": 0.2628, | |
| "step": 15080 | |
| }, | |
| { | |
| "epoch": 73.70347773032337, | |
| "grad_norm": 3.9376978874206543, | |
| "learning_rate": 0.00010163934426229508, | |
| "loss": 0.2428, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 73.80109823062843, | |
| "grad_norm": 4.3148980140686035, | |
| "learning_rate": 0.00010150819672131149, | |
| "loss": 0.3151, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 73.8987187309335, | |
| "grad_norm": 3.7548837661743164, | |
| "learning_rate": 0.00010137704918032786, | |
| "loss": 0.2676, | |
| "step": 15140 | |
| }, | |
| { | |
| "epoch": 73.99633923123857, | |
| "grad_norm": 3.9053032398223877, | |
| "learning_rate": 0.00010124590163934427, | |
| "loss": 0.298, | |
| "step": 15160 | |
| }, | |
| { | |
| "epoch": 74.09395973154362, | |
| "grad_norm": 4.0978007316589355, | |
| "learning_rate": 0.00010111475409836067, | |
| "loss": 0.2597, | |
| "step": 15180 | |
| }, | |
| { | |
| "epoch": 74.19158023184869, | |
| "grad_norm": 2.8652732372283936, | |
| "learning_rate": 0.00010098360655737706, | |
| "loss": 0.2842, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 74.28920073215376, | |
| "grad_norm": 3.180609941482544, | |
| "learning_rate": 0.00010085245901639345, | |
| "loss": 0.2737, | |
| "step": 15220 | |
| }, | |
| { | |
| "epoch": 74.38682123245881, | |
| "grad_norm": 4.571623802185059, | |
| "learning_rate": 0.00010072131147540984, | |
| "loss": 0.3213, | |
| "step": 15240 | |
| }, | |
| { | |
| "epoch": 74.48444173276388, | |
| "grad_norm": 3.073421001434326, | |
| "learning_rate": 0.00010059016393442624, | |
| "loss": 0.2922, | |
| "step": 15260 | |
| }, | |
| { | |
| "epoch": 74.58206223306894, | |
| "grad_norm": 5.4209980964660645, | |
| "learning_rate": 0.00010045901639344261, | |
| "loss": 0.2575, | |
| "step": 15280 | |
| }, | |
| { | |
| "epoch": 74.67968273337401, | |
| "grad_norm": 4.5742621421813965, | |
| "learning_rate": 0.00010032786885245902, | |
| "loss": 0.3044, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 74.77730323367908, | |
| "grad_norm": 3.712390422821045, | |
| "learning_rate": 0.00010019672131147542, | |
| "loss": 0.2891, | |
| "step": 15320 | |
| }, | |
| { | |
| "epoch": 74.87492373398413, | |
| "grad_norm": 3.149919033050537, | |
| "learning_rate": 0.00010006557377049181, | |
| "loss": 0.275, | |
| "step": 15340 | |
| }, | |
| { | |
| "epoch": 74.9725442342892, | |
| "grad_norm": 4.481144428253174, | |
| "learning_rate": 9.99344262295082e-05, | |
| "loss": 0.2544, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 75.07016473459427, | |
| "grad_norm": 3.6176226139068604, | |
| "learning_rate": 9.980327868852459e-05, | |
| "loss": 0.2656, | |
| "step": 15380 | |
| }, | |
| { | |
| "epoch": 75.16778523489933, | |
| "grad_norm": 3.986781120300293, | |
| "learning_rate": 9.967213114754099e-05, | |
| "loss": 0.2712, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 75.2654057352044, | |
| "grad_norm": 2.7806153297424316, | |
| "learning_rate": 9.954098360655738e-05, | |
| "loss": 0.2542, | |
| "step": 15420 | |
| }, | |
| { | |
| "epoch": 75.36302623550945, | |
| "grad_norm": 4.493511199951172, | |
| "learning_rate": 9.940983606557378e-05, | |
| "loss": 0.2871, | |
| "step": 15440 | |
| }, | |
| { | |
| "epoch": 75.46064673581452, | |
| "grad_norm": 4.610682964324951, | |
| "learning_rate": 9.927868852459017e-05, | |
| "loss": 0.2699, | |
| "step": 15460 | |
| }, | |
| { | |
| "epoch": 75.55826723611959, | |
| "grad_norm": 3.7209839820861816, | |
| "learning_rate": 9.914754098360656e-05, | |
| "loss": 0.3086, | |
| "step": 15480 | |
| }, | |
| { | |
| "epoch": 75.65588773642465, | |
| "grad_norm": 4.1248931884765625, | |
| "learning_rate": 9.901639344262295e-05, | |
| "loss": 0.2744, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 75.75350823672972, | |
| "grad_norm": 3.7576661109924316, | |
| "learning_rate": 9.888524590163934e-05, | |
| "loss": 0.2791, | |
| "step": 15520 | |
| }, | |
| { | |
| "epoch": 75.85112873703477, | |
| "grad_norm": 3.6697514057159424, | |
| "learning_rate": 9.875409836065574e-05, | |
| "loss": 0.2779, | |
| "step": 15540 | |
| }, | |
| { | |
| "epoch": 75.94874923733984, | |
| "grad_norm": 4.156905651092529, | |
| "learning_rate": 9.862295081967213e-05, | |
| "loss": 0.2806, | |
| "step": 15560 | |
| }, | |
| { | |
| "epoch": 76.04636973764491, | |
| "grad_norm": 3.244154214859009, | |
| "learning_rate": 9.849180327868854e-05, | |
| "loss": 0.2772, | |
| "step": 15580 | |
| }, | |
| { | |
| "epoch": 76.14399023794996, | |
| "grad_norm": 4.677572250366211, | |
| "learning_rate": 9.836065573770493e-05, | |
| "loss": 0.2695, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 76.24161073825503, | |
| "grad_norm": 3.442591667175293, | |
| "learning_rate": 9.822950819672132e-05, | |
| "loss": 0.309, | |
| "step": 15620 | |
| }, | |
| { | |
| "epoch": 76.3392312385601, | |
| "grad_norm": 2.930734395980835, | |
| "learning_rate": 9.80983606557377e-05, | |
| "loss": 0.2543, | |
| "step": 15640 | |
| }, | |
| { | |
| "epoch": 76.43685173886516, | |
| "grad_norm": 3.7919161319732666, | |
| "learning_rate": 9.796721311475411e-05, | |
| "loss": 0.2802, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 76.53447223917023, | |
| "grad_norm": 3.598411798477173, | |
| "learning_rate": 9.78360655737705e-05, | |
| "loss": 0.2433, | |
| "step": 15680 | |
| }, | |
| { | |
| "epoch": 76.63209273947528, | |
| "grad_norm": 3.6771163940429688, | |
| "learning_rate": 9.770491803278689e-05, | |
| "loss": 0.2529, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 76.72971323978035, | |
| "grad_norm": 3.1646664142608643, | |
| "learning_rate": 9.757377049180329e-05, | |
| "loss": 0.2801, | |
| "step": 15720 | |
| }, | |
| { | |
| "epoch": 76.82733374008542, | |
| "grad_norm": 2.325819969177246, | |
| "learning_rate": 9.744262295081968e-05, | |
| "loss": 0.2609, | |
| "step": 15740 | |
| }, | |
| { | |
| "epoch": 76.92495424039048, | |
| "grad_norm": 4.598663330078125, | |
| "learning_rate": 9.731147540983607e-05, | |
| "loss": 0.2936, | |
| "step": 15760 | |
| }, | |
| { | |
| "epoch": 77.02257474069555, | |
| "grad_norm": 2.9897961616516113, | |
| "learning_rate": 9.718032786885246e-05, | |
| "loss": 0.2891, | |
| "step": 15780 | |
| }, | |
| { | |
| "epoch": 77.12019524100062, | |
| "grad_norm": 4.264781951904297, | |
| "learning_rate": 9.704918032786886e-05, | |
| "loss": 0.2677, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 77.21781574130567, | |
| "grad_norm": 3.616536855697632, | |
| "learning_rate": 9.691803278688525e-05, | |
| "loss": 0.2282, | |
| "step": 15820 | |
| }, | |
| { | |
| "epoch": 77.31543624161074, | |
| "grad_norm": 3.4226481914520264, | |
| "learning_rate": 9.678688524590165e-05, | |
| "loss": 0.2792, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 77.4130567419158, | |
| "grad_norm": 3.012357473373413, | |
| "learning_rate": 9.665573770491804e-05, | |
| "loss": 0.2644, | |
| "step": 15860 | |
| }, | |
| { | |
| "epoch": 77.51067724222086, | |
| "grad_norm": 3.582298517227173, | |
| "learning_rate": 9.652459016393443e-05, | |
| "loss": 0.2944, | |
| "step": 15880 | |
| }, | |
| { | |
| "epoch": 77.60829774252593, | |
| "grad_norm": 4.138154983520508, | |
| "learning_rate": 9.639344262295082e-05, | |
| "loss": 0.3097, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 77.70591824283099, | |
| "grad_norm": 4.919800281524658, | |
| "learning_rate": 9.626229508196721e-05, | |
| "loss": 0.2666, | |
| "step": 15920 | |
| }, | |
| { | |
| "epoch": 77.80353874313606, | |
| "grad_norm": 3.3038241863250732, | |
| "learning_rate": 9.613114754098361e-05, | |
| "loss": 0.2909, | |
| "step": 15940 | |
| }, | |
| { | |
| "epoch": 77.90115924344113, | |
| "grad_norm": 2.6817522048950195, | |
| "learning_rate": 9.6e-05, | |
| "loss": 0.2565, | |
| "step": 15960 | |
| }, | |
| { | |
| "epoch": 77.99877974374618, | |
| "grad_norm": 4.992214679718018, | |
| "learning_rate": 9.58688524590164e-05, | |
| "loss": 0.2767, | |
| "step": 15980 | |
| }, | |
| { | |
| "epoch": 78.09640024405125, | |
| "grad_norm": 3.589238166809082, | |
| "learning_rate": 9.57377049180328e-05, | |
| "loss": 0.2483, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 78.19402074435631, | |
| "grad_norm": 4.856943607330322, | |
| "learning_rate": 9.560655737704918e-05, | |
| "loss": 0.2698, | |
| "step": 16020 | |
| }, | |
| { | |
| "epoch": 78.29164124466138, | |
| "grad_norm": 3.963756561279297, | |
| "learning_rate": 9.547540983606557e-05, | |
| "loss": 0.2522, | |
| "step": 16040 | |
| }, | |
| { | |
| "epoch": 78.38926174496645, | |
| "grad_norm": 3.843501567840576, | |
| "learning_rate": 9.534426229508198e-05, | |
| "loss": 0.2502, | |
| "step": 16060 | |
| }, | |
| { | |
| "epoch": 78.4868822452715, | |
| "grad_norm": 3.1323421001434326, | |
| "learning_rate": 9.521311475409837e-05, | |
| "loss": 0.2693, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 78.58450274557657, | |
| "grad_norm": 3.3395233154296875, | |
| "learning_rate": 9.508196721311476e-05, | |
| "loss": 0.2806, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 78.68212324588164, | |
| "grad_norm": 3.2387075424194336, | |
| "learning_rate": 9.495081967213116e-05, | |
| "loss": 0.266, | |
| "step": 16120 | |
| }, | |
| { | |
| "epoch": 78.7797437461867, | |
| "grad_norm": 3.9725232124328613, | |
| "learning_rate": 9.481967213114755e-05, | |
| "loss": 0.2753, | |
| "step": 16140 | |
| }, | |
| { | |
| "epoch": 78.87736424649177, | |
| "grad_norm": 3.88059401512146, | |
| "learning_rate": 9.468852459016394e-05, | |
| "loss": 0.3028, | |
| "step": 16160 | |
| }, | |
| { | |
| "epoch": 78.97498474679682, | |
| "grad_norm": 4.623359680175781, | |
| "learning_rate": 9.455737704918033e-05, | |
| "loss": 0.2816, | |
| "step": 16180 | |
| }, | |
| { | |
| "epoch": 79.07260524710189, | |
| "grad_norm": 3.237917423248291, | |
| "learning_rate": 9.442622950819673e-05, | |
| "loss": 0.2545, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 79.17022574740696, | |
| "grad_norm": 3.4442942142486572, | |
| "learning_rate": 9.429508196721312e-05, | |
| "loss": 0.2596, | |
| "step": 16220 | |
| }, | |
| { | |
| "epoch": 79.26784624771201, | |
| "grad_norm": 4.398017406463623, | |
| "learning_rate": 9.416393442622952e-05, | |
| "loss": 0.2731, | |
| "step": 16240 | |
| }, | |
| { | |
| "epoch": 79.36546674801708, | |
| "grad_norm": 3.892063856124878, | |
| "learning_rate": 9.403278688524591e-05, | |
| "loss": 0.266, | |
| "step": 16260 | |
| }, | |
| { | |
| "epoch": 79.46308724832215, | |
| "grad_norm": 4.046624183654785, | |
| "learning_rate": 9.39016393442623e-05, | |
| "loss": 0.2368, | |
| "step": 16280 | |
| }, | |
| { | |
| "epoch": 79.56070774862721, | |
| "grad_norm": 5.123999118804932, | |
| "learning_rate": 9.377049180327869e-05, | |
| "loss": 0.2409, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 79.65832824893228, | |
| "grad_norm": 3.1601643562316895, | |
| "learning_rate": 9.363934426229508e-05, | |
| "loss": 0.2525, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 79.75594874923733, | |
| "grad_norm": 4.788114070892334, | |
| "learning_rate": 9.350819672131148e-05, | |
| "loss": 0.2998, | |
| "step": 16340 | |
| }, | |
| { | |
| "epoch": 79.8535692495424, | |
| "grad_norm": 3.9707436561584473, | |
| "learning_rate": 9.337704918032787e-05, | |
| "loss": 0.2724, | |
| "step": 16360 | |
| }, | |
| { | |
| "epoch": 79.95118974984747, | |
| "grad_norm": 3.7388081550598145, | |
| "learning_rate": 9.324590163934427e-05, | |
| "loss": 0.2823, | |
| "step": 16380 | |
| }, | |
| { | |
| "epoch": 80.04881025015253, | |
| "grad_norm": 2.7105419635772705, | |
| "learning_rate": 9.311475409836066e-05, | |
| "loss": 0.2602, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 80.1464307504576, | |
| "grad_norm": 3.460477113723755, | |
| "learning_rate": 9.298360655737705e-05, | |
| "loss": 0.2428, | |
| "step": 16420 | |
| }, | |
| { | |
| "epoch": 80.24405125076267, | |
| "grad_norm": 3.526585817337036, | |
| "learning_rate": 9.285245901639344e-05, | |
| "loss": 0.2982, | |
| "step": 16440 | |
| }, | |
| { | |
| "epoch": 80.34167175106772, | |
| "grad_norm": 3.746425151824951, | |
| "learning_rate": 9.272131147540985e-05, | |
| "loss": 0.2365, | |
| "step": 16460 | |
| }, | |
| { | |
| "epoch": 80.43929225137279, | |
| "grad_norm": 3.2804489135742188, | |
| "learning_rate": 9.259016393442623e-05, | |
| "loss": 0.2832, | |
| "step": 16480 | |
| }, | |
| { | |
| "epoch": 80.53691275167785, | |
| "grad_norm": 3.136016607284546, | |
| "learning_rate": 9.245901639344264e-05, | |
| "loss": 0.2744, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 80.63453325198292, | |
| "grad_norm": 4.361492156982422, | |
| "learning_rate": 9.232786885245903e-05, | |
| "loss": 0.2726, | |
| "step": 16520 | |
| }, | |
| { | |
| "epoch": 80.73215375228799, | |
| "grad_norm": 3.3838353157043457, | |
| "learning_rate": 9.21967213114754e-05, | |
| "loss": 0.2651, | |
| "step": 16540 | |
| }, | |
| { | |
| "epoch": 80.82977425259304, | |
| "grad_norm": 3.8092405796051025, | |
| "learning_rate": 9.20655737704918e-05, | |
| "loss": 0.2519, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 80.92739475289811, | |
| "grad_norm": 4.455862522125244, | |
| "learning_rate": 9.19344262295082e-05, | |
| "loss": 0.2518, | |
| "step": 16580 | |
| }, | |
| { | |
| "epoch": 81.02501525320318, | |
| "grad_norm": 3.073539972305298, | |
| "learning_rate": 9.18032786885246e-05, | |
| "loss": 0.272, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 81.12263575350823, | |
| "grad_norm": 4.361839294433594, | |
| "learning_rate": 9.167213114754099e-05, | |
| "loss": 0.2468, | |
| "step": 16620 | |
| }, | |
| { | |
| "epoch": 81.2202562538133, | |
| "grad_norm": 5.322413444519043, | |
| "learning_rate": 9.154098360655739e-05, | |
| "loss": 0.2339, | |
| "step": 16640 | |
| }, | |
| { | |
| "epoch": 81.31787675411836, | |
| "grad_norm": 4.6976704597473145, | |
| "learning_rate": 9.140983606557378e-05, | |
| "loss": 0.27, | |
| "step": 16660 | |
| }, | |
| { | |
| "epoch": 81.41549725442343, | |
| "grad_norm": 3.608119487762451, | |
| "learning_rate": 9.127868852459017e-05, | |
| "loss": 0.2615, | |
| "step": 16680 | |
| }, | |
| { | |
| "epoch": 81.5131177547285, | |
| "grad_norm": 3.837738513946533, | |
| "learning_rate": 9.114754098360656e-05, | |
| "loss": 0.2534, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 81.61073825503355, | |
| "grad_norm": 3.750638723373413, | |
| "learning_rate": 9.101639344262296e-05, | |
| "loss": 0.2533, | |
| "step": 16720 | |
| }, | |
| { | |
| "epoch": 81.70835875533862, | |
| "grad_norm": 3.916907548904419, | |
| "learning_rate": 9.088524590163935e-05, | |
| "loss": 0.2847, | |
| "step": 16740 | |
| }, | |
| { | |
| "epoch": 81.80597925564369, | |
| "grad_norm": 3.8545727729797363, | |
| "learning_rate": 9.075409836065574e-05, | |
| "loss": 0.2554, | |
| "step": 16760 | |
| }, | |
| { | |
| "epoch": 81.90359975594875, | |
| "grad_norm": 2.8878276348114014, | |
| "learning_rate": 9.062295081967214e-05, | |
| "loss": 0.2793, | |
| "step": 16780 | |
| }, | |
| { | |
| "epoch": 82.00122025625382, | |
| "grad_norm": 3.6857919692993164, | |
| "learning_rate": 9.049180327868852e-05, | |
| "loss": 0.2703, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 82.09884075655887, | |
| "grad_norm": 3.670179605484009, | |
| "learning_rate": 9.036065573770492e-05, | |
| "loss": 0.2641, | |
| "step": 16820 | |
| }, | |
| { | |
| "epoch": 82.19646125686394, | |
| "grad_norm": 3.4155802726745605, | |
| "learning_rate": 9.022950819672131e-05, | |
| "loss": 0.2528, | |
| "step": 16840 | |
| }, | |
| { | |
| "epoch": 82.29408175716901, | |
| "grad_norm": 3.139631986618042, | |
| "learning_rate": 9.009836065573771e-05, | |
| "loss": 0.2485, | |
| "step": 16860 | |
| }, | |
| { | |
| "epoch": 82.39170225747407, | |
| "grad_norm": 3.919480800628662, | |
| "learning_rate": 8.99672131147541e-05, | |
| "loss": 0.2586, | |
| "step": 16880 | |
| }, | |
| { | |
| "epoch": 82.48932275777914, | |
| "grad_norm": 4.068678379058838, | |
| "learning_rate": 8.98360655737705e-05, | |
| "loss": 0.2836, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 82.5869432580842, | |
| "grad_norm": 3.544682502746582, | |
| "learning_rate": 8.97049180327869e-05, | |
| "loss": 0.2515, | |
| "step": 16920 | |
| }, | |
| { | |
| "epoch": 82.68456375838926, | |
| "grad_norm": 3.8421828746795654, | |
| "learning_rate": 8.957377049180328e-05, | |
| "loss": 0.2654, | |
| "step": 16940 | |
| }, | |
| { | |
| "epoch": 82.78218425869433, | |
| "grad_norm": 3.0508570671081543, | |
| "learning_rate": 8.944262295081967e-05, | |
| "loss": 0.2742, | |
| "step": 16960 | |
| }, | |
| { | |
| "epoch": 82.87980475899938, | |
| "grad_norm": 3.601579427719116, | |
| "learning_rate": 8.931147540983606e-05, | |
| "loss": 0.2542, | |
| "step": 16980 | |
| }, | |
| { | |
| "epoch": 82.97742525930445, | |
| "grad_norm": 3.657724618911743, | |
| "learning_rate": 8.918032786885247e-05, | |
| "loss": 0.2253, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 83.07504575960952, | |
| "grad_norm": 5.093318462371826, | |
| "learning_rate": 8.904918032786886e-05, | |
| "loss": 0.2429, | |
| "step": 17020 | |
| }, | |
| { | |
| "epoch": 83.17266625991458, | |
| "grad_norm": 3.160510301589966, | |
| "learning_rate": 8.891803278688526e-05, | |
| "loss": 0.2416, | |
| "step": 17040 | |
| }, | |
| { | |
| "epoch": 83.27028676021965, | |
| "grad_norm": 3.9046666622161865, | |
| "learning_rate": 8.878688524590163e-05, | |
| "loss": 0.2526, | |
| "step": 17060 | |
| }, | |
| { | |
| "epoch": 83.36790726052472, | |
| "grad_norm": 4.196876525878906, | |
| "learning_rate": 8.865573770491804e-05, | |
| "loss": 0.2518, | |
| "step": 17080 | |
| }, | |
| { | |
| "epoch": 83.46552776082977, | |
| "grad_norm": 3.5083820819854736, | |
| "learning_rate": 8.852459016393443e-05, | |
| "loss": 0.2392, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 83.56314826113484, | |
| "grad_norm": 3.099393606185913, | |
| "learning_rate": 8.839344262295083e-05, | |
| "loss": 0.2783, | |
| "step": 17120 | |
| }, | |
| { | |
| "epoch": 83.6607687614399, | |
| "grad_norm": 3.532540798187256, | |
| "learning_rate": 8.826229508196722e-05, | |
| "loss": 0.261, | |
| "step": 17140 | |
| }, | |
| { | |
| "epoch": 83.75838926174497, | |
| "grad_norm": 3.2548046112060547, | |
| "learning_rate": 8.813114754098362e-05, | |
| "loss": 0.2622, | |
| "step": 17160 | |
| }, | |
| { | |
| "epoch": 83.85600976205004, | |
| "grad_norm": 4.520061492919922, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 0.272, | |
| "step": 17180 | |
| }, | |
| { | |
| "epoch": 83.95363026235509, | |
| "grad_norm": 3.1347196102142334, | |
| "learning_rate": 8.786885245901639e-05, | |
| "loss": 0.2504, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 84.05125076266016, | |
| "grad_norm": 3.6954307556152344, | |
| "learning_rate": 8.773770491803279e-05, | |
| "loss": 0.2566, | |
| "step": 17220 | |
| }, | |
| { | |
| "epoch": 84.14887126296523, | |
| "grad_norm": 4.145720481872559, | |
| "learning_rate": 8.760655737704918e-05, | |
| "loss": 0.2393, | |
| "step": 17240 | |
| }, | |
| { | |
| "epoch": 84.24649176327028, | |
| "grad_norm": 3.575308084487915, | |
| "learning_rate": 8.747540983606558e-05, | |
| "loss": 0.2667, | |
| "step": 17260 | |
| }, | |
| { | |
| "epoch": 84.34411226357535, | |
| "grad_norm": 4.094547271728516, | |
| "learning_rate": 8.734426229508197e-05, | |
| "loss": 0.244, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 84.44173276388041, | |
| "grad_norm": 3.583008050918579, | |
| "learning_rate": 8.721311475409837e-05, | |
| "loss": 0.2262, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 84.53935326418548, | |
| "grad_norm": 4.159909248352051, | |
| "learning_rate": 8.708196721311475e-05, | |
| "loss": 0.2625, | |
| "step": 17320 | |
| }, | |
| { | |
| "epoch": 84.63697376449055, | |
| "grad_norm": 3.5979392528533936, | |
| "learning_rate": 8.695081967213115e-05, | |
| "loss": 0.2421, | |
| "step": 17340 | |
| }, | |
| { | |
| "epoch": 84.7345942647956, | |
| "grad_norm": 2.0888795852661133, | |
| "learning_rate": 8.681967213114754e-05, | |
| "loss": 0.2555, | |
| "step": 17360 | |
| }, | |
| { | |
| "epoch": 84.83221476510067, | |
| "grad_norm": 3.8699913024902344, | |
| "learning_rate": 8.668852459016393e-05, | |
| "loss": 0.2733, | |
| "step": 17380 | |
| }, | |
| { | |
| "epoch": 84.92983526540573, | |
| "grad_norm": 3.1710896492004395, | |
| "learning_rate": 8.655737704918033e-05, | |
| "loss": 0.2824, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 85.0274557657108, | |
| "grad_norm": 3.3995399475097656, | |
| "learning_rate": 8.642622950819672e-05, | |
| "loss": 0.2555, | |
| "step": 17420 | |
| }, | |
| { | |
| "epoch": 85.12507626601587, | |
| "grad_norm": 4.362491130828857, | |
| "learning_rate": 8.629508196721313e-05, | |
| "loss": 0.2535, | |
| "step": 17440 | |
| }, | |
| { | |
| "epoch": 85.22269676632092, | |
| "grad_norm": 3.141589641571045, | |
| "learning_rate": 8.61639344262295e-05, | |
| "loss": 0.2368, | |
| "step": 17460 | |
| }, | |
| { | |
| "epoch": 85.32031726662599, | |
| "grad_norm": 2.7880699634552, | |
| "learning_rate": 8.60327868852459e-05, | |
| "loss": 0.2382, | |
| "step": 17480 | |
| }, | |
| { | |
| "epoch": 85.41793776693106, | |
| "grad_norm": 3.9995205402374268, | |
| "learning_rate": 8.59016393442623e-05, | |
| "loss": 0.2484, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 85.51555826723612, | |
| "grad_norm": 4.39112663269043, | |
| "learning_rate": 8.57704918032787e-05, | |
| "loss": 0.254, | |
| "step": 17520 | |
| }, | |
| { | |
| "epoch": 85.61317876754119, | |
| "grad_norm": 3.7383909225463867, | |
| "learning_rate": 8.563934426229509e-05, | |
| "loss": 0.2685, | |
| "step": 17540 | |
| }, | |
| { | |
| "epoch": 85.71079926784624, | |
| "grad_norm": 2.9596993923187256, | |
| "learning_rate": 8.550819672131149e-05, | |
| "loss": 0.2725, | |
| "step": 17560 | |
| }, | |
| { | |
| "epoch": 85.80841976815131, | |
| "grad_norm": 3.4363205432891846, | |
| "learning_rate": 8.537704918032787e-05, | |
| "loss": 0.2518, | |
| "step": 17580 | |
| }, | |
| { | |
| "epoch": 85.90604026845638, | |
| "grad_norm": 4.1404709815979, | |
| "learning_rate": 8.524590163934426e-05, | |
| "loss": 0.2409, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 86.00366076876143, | |
| "grad_norm": 3.080606460571289, | |
| "learning_rate": 8.511475409836066e-05, | |
| "loss": 0.2646, | |
| "step": 17620 | |
| }, | |
| { | |
| "epoch": 86.1012812690665, | |
| "grad_norm": 4.42800760269165, | |
| "learning_rate": 8.498360655737705e-05, | |
| "loss": 0.2274, | |
| "step": 17640 | |
| }, | |
| { | |
| "epoch": 86.19890176937157, | |
| "grad_norm": 3.3812572956085205, | |
| "learning_rate": 8.485245901639345e-05, | |
| "loss": 0.2165, | |
| "step": 17660 | |
| }, | |
| { | |
| "epoch": 86.29652226967663, | |
| "grad_norm": 3.873788833618164, | |
| "learning_rate": 8.472131147540984e-05, | |
| "loss": 0.2464, | |
| "step": 17680 | |
| }, | |
| { | |
| "epoch": 86.3941427699817, | |
| "grad_norm": 4.275656700134277, | |
| "learning_rate": 8.459016393442624e-05, | |
| "loss": 0.2236, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 86.49176327028675, | |
| "grad_norm": 3.0242347717285156, | |
| "learning_rate": 8.445901639344262e-05, | |
| "loss": 0.2518, | |
| "step": 17720 | |
| }, | |
| { | |
| "epoch": 86.58938377059182, | |
| "grad_norm": 3.052992105484009, | |
| "learning_rate": 8.432786885245902e-05, | |
| "loss": 0.2659, | |
| "step": 17740 | |
| }, | |
| { | |
| "epoch": 86.68700427089689, | |
| "grad_norm": 5.510742664337158, | |
| "learning_rate": 8.419672131147541e-05, | |
| "loss": 0.269, | |
| "step": 17760 | |
| }, | |
| { | |
| "epoch": 86.78462477120195, | |
| "grad_norm": 3.8788983821868896, | |
| "learning_rate": 8.406557377049181e-05, | |
| "loss": 0.2548, | |
| "step": 17780 | |
| }, | |
| { | |
| "epoch": 86.88224527150702, | |
| "grad_norm": 4.494417190551758, | |
| "learning_rate": 8.39344262295082e-05, | |
| "loss": 0.2558, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 86.97986577181209, | |
| "grad_norm": 4.6547040939331055, | |
| "learning_rate": 8.380327868852459e-05, | |
| "loss": 0.2775, | |
| "step": 17820 | |
| }, | |
| { | |
| "epoch": 87.07748627211714, | |
| "grad_norm": 3.511051654815674, | |
| "learning_rate": 8.367213114754098e-05, | |
| "loss": 0.2233, | |
| "step": 17840 | |
| }, | |
| { | |
| "epoch": 87.17510677242221, | |
| "grad_norm": 4.116011142730713, | |
| "learning_rate": 8.354098360655737e-05, | |
| "loss": 0.2246, | |
| "step": 17860 | |
| }, | |
| { | |
| "epoch": 87.27272727272727, | |
| "grad_norm": 3.525118589401245, | |
| "learning_rate": 8.340983606557377e-05, | |
| "loss": 0.2058, | |
| "step": 17880 | |
| }, | |
| { | |
| "epoch": 87.37034777303234, | |
| "grad_norm": 3.5380094051361084, | |
| "learning_rate": 8.327868852459016e-05, | |
| "loss": 0.2467, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 87.4679682733374, | |
| "grad_norm": 3.6875052452087402, | |
| "learning_rate": 8.314754098360657e-05, | |
| "loss": 0.2967, | |
| "step": 17920 | |
| }, | |
| { | |
| "epoch": 87.56558877364246, | |
| "grad_norm": 3.565765142440796, | |
| "learning_rate": 8.301639344262296e-05, | |
| "loss": 0.2434, | |
| "step": 17940 | |
| }, | |
| { | |
| "epoch": 87.66320927394753, | |
| "grad_norm": 3.3109848499298096, | |
| "learning_rate": 8.288524590163935e-05, | |
| "loss": 0.2538, | |
| "step": 17960 | |
| }, | |
| { | |
| "epoch": 87.7608297742526, | |
| "grad_norm": 3.278052568435669, | |
| "learning_rate": 8.275409836065573e-05, | |
| "loss": 0.2739, | |
| "step": 17980 | |
| }, | |
| { | |
| "epoch": 87.85845027455765, | |
| "grad_norm": 5.82271671295166, | |
| "learning_rate": 8.262295081967214e-05, | |
| "loss": 0.244, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 87.95607077486272, | |
| "grad_norm": 4.266513824462891, | |
| "learning_rate": 8.249180327868853e-05, | |
| "loss": 0.2546, | |
| "step": 18020 | |
| }, | |
| { | |
| "epoch": 88.05369127516778, | |
| "grad_norm": 3.985555648803711, | |
| "learning_rate": 8.236065573770492e-05, | |
| "loss": 0.2673, | |
| "step": 18040 | |
| }, | |
| { | |
| "epoch": 88.15131177547285, | |
| "grad_norm": 3.4252424240112305, | |
| "learning_rate": 8.222950819672132e-05, | |
| "loss": 0.2308, | |
| "step": 18060 | |
| }, | |
| { | |
| "epoch": 88.24893227577792, | |
| "grad_norm": 3.1381208896636963, | |
| "learning_rate": 8.209836065573771e-05, | |
| "loss": 0.24, | |
| "step": 18080 | |
| }, | |
| { | |
| "epoch": 88.34655277608297, | |
| "grad_norm": 4.053617000579834, | |
| "learning_rate": 8.19672131147541e-05, | |
| "loss": 0.2646, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 88.44417327638804, | |
| "grad_norm": 3.525423288345337, | |
| "learning_rate": 8.183606557377049e-05, | |
| "loss": 0.2535, | |
| "step": 18120 | |
| }, | |
| { | |
| "epoch": 88.54179377669311, | |
| "grad_norm": 3.593766212463379, | |
| "learning_rate": 8.170491803278689e-05, | |
| "loss": 0.2426, | |
| "step": 18140 | |
| }, | |
| { | |
| "epoch": 88.63941427699817, | |
| "grad_norm": 4.087725639343262, | |
| "learning_rate": 8.157377049180328e-05, | |
| "loss": 0.2196, | |
| "step": 18160 | |
| }, | |
| { | |
| "epoch": 88.73703477730324, | |
| "grad_norm": 4.209127902984619, | |
| "learning_rate": 8.144262295081968e-05, | |
| "loss": 0.2593, | |
| "step": 18180 | |
| }, | |
| { | |
| "epoch": 88.83465527760829, | |
| "grad_norm": 3.9146687984466553, | |
| "learning_rate": 8.131147540983607e-05, | |
| "loss": 0.2492, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 88.93227577791336, | |
| "grad_norm": 3.4198620319366455, | |
| "learning_rate": 8.118032786885246e-05, | |
| "loss": 0.2569, | |
| "step": 18220 | |
| }, | |
| { | |
| "epoch": 89.02989627821843, | |
| "grad_norm": 3.5384342670440674, | |
| "learning_rate": 8.104918032786885e-05, | |
| "loss": 0.2298, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 89.12751677852349, | |
| "grad_norm": 3.252002239227295, | |
| "learning_rate": 8.091803278688524e-05, | |
| "loss": 0.2296, | |
| "step": 18260 | |
| }, | |
| { | |
| "epoch": 89.22513727882856, | |
| "grad_norm": 3.5200119018554688, | |
| "learning_rate": 8.078688524590164e-05, | |
| "loss": 0.2476, | |
| "step": 18280 | |
| }, | |
| { | |
| "epoch": 89.32275777913362, | |
| "grad_norm": 4.229335784912109, | |
| "learning_rate": 8.065573770491803e-05, | |
| "loss": 0.2427, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 89.42037827943868, | |
| "grad_norm": 3.418846368789673, | |
| "learning_rate": 8.052459016393444e-05, | |
| "loss": 0.248, | |
| "step": 18320 | |
| }, | |
| { | |
| "epoch": 89.51799877974375, | |
| "grad_norm": 4.674842357635498, | |
| "learning_rate": 8.039344262295082e-05, | |
| "loss": 0.2388, | |
| "step": 18340 | |
| }, | |
| { | |
| "epoch": 89.6156192800488, | |
| "grad_norm": 2.8320224285125732, | |
| "learning_rate": 8.026229508196721e-05, | |
| "loss": 0.2737, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 89.71323978035387, | |
| "grad_norm": 3.764437198638916, | |
| "learning_rate": 8.01311475409836e-05, | |
| "loss": 0.2217, | |
| "step": 18380 | |
| }, | |
| { | |
| "epoch": 89.81086028065894, | |
| "grad_norm": 3.7620530128479004, | |
| "learning_rate": 8e-05, | |
| "loss": 0.2339, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 89.908480780964, | |
| "grad_norm": 3.451263904571533, | |
| "learning_rate": 7.98688524590164e-05, | |
| "loss": 0.239, | |
| "step": 18420 | |
| }, | |
| { | |
| "epoch": 90.00610128126907, | |
| "grad_norm": 3.327611207962036, | |
| "learning_rate": 7.97377049180328e-05, | |
| "loss": 0.2509, | |
| "step": 18440 | |
| }, | |
| { | |
| "epoch": 90.10372178157414, | |
| "grad_norm": 3.0182056427001953, | |
| "learning_rate": 7.960655737704919e-05, | |
| "loss": 0.2173, | |
| "step": 18460 | |
| }, | |
| { | |
| "epoch": 90.20134228187919, | |
| "grad_norm": 4.298709869384766, | |
| "learning_rate": 7.947540983606558e-05, | |
| "loss": 0.2371, | |
| "step": 18480 | |
| }, | |
| { | |
| "epoch": 90.29896278218426, | |
| "grad_norm": 6.2249979972839355, | |
| "learning_rate": 7.934426229508197e-05, | |
| "loss": 0.2504, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 90.39658328248932, | |
| "grad_norm": 3.3702869415283203, | |
| "learning_rate": 7.921311475409836e-05, | |
| "loss": 0.2264, | |
| "step": 18520 | |
| }, | |
| { | |
| "epoch": 90.49420378279439, | |
| "grad_norm": 4.575166702270508, | |
| "learning_rate": 7.908196721311476e-05, | |
| "loss": 0.2296, | |
| "step": 18540 | |
| }, | |
| { | |
| "epoch": 90.59182428309946, | |
| "grad_norm": 3.4049549102783203, | |
| "learning_rate": 7.895081967213115e-05, | |
| "loss": 0.2488, | |
| "step": 18560 | |
| }, | |
| { | |
| "epoch": 90.68944478340451, | |
| "grad_norm": 3.5313448905944824, | |
| "learning_rate": 7.881967213114755e-05, | |
| "loss": 0.2414, | |
| "step": 18580 | |
| }, | |
| { | |
| "epoch": 90.78706528370958, | |
| "grad_norm": 3.3291146755218506, | |
| "learning_rate": 7.868852459016394e-05, | |
| "loss": 0.225, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 90.88468578401465, | |
| "grad_norm": 2.99589204788208, | |
| "learning_rate": 7.855737704918033e-05, | |
| "loss": 0.2548, | |
| "step": 18620 | |
| }, | |
| { | |
| "epoch": 90.9823062843197, | |
| "grad_norm": 3.2335972785949707, | |
| "learning_rate": 7.842622950819672e-05, | |
| "loss": 0.269, | |
| "step": 18640 | |
| }, | |
| { | |
| "epoch": 91.07992678462477, | |
| "grad_norm": 4.912237167358398, | |
| "learning_rate": 7.829508196721311e-05, | |
| "loss": 0.2392, | |
| "step": 18660 | |
| }, | |
| { | |
| "epoch": 91.17754728492983, | |
| "grad_norm": 4.516569137573242, | |
| "learning_rate": 7.816393442622951e-05, | |
| "loss": 0.2213, | |
| "step": 18680 | |
| }, | |
| { | |
| "epoch": 91.2751677852349, | |
| "grad_norm": 3.225470542907715, | |
| "learning_rate": 7.80327868852459e-05, | |
| "loss": 0.2414, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 91.37278828553997, | |
| "grad_norm": 3.231811761856079, | |
| "learning_rate": 7.79016393442623e-05, | |
| "loss": 0.2384, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 91.47040878584502, | |
| "grad_norm": 3.199504852294922, | |
| "learning_rate": 7.77704918032787e-05, | |
| "loss": 0.2336, | |
| "step": 18740 | |
| }, | |
| { | |
| "epoch": 91.5680292861501, | |
| "grad_norm": 3.4679319858551025, | |
| "learning_rate": 7.763934426229508e-05, | |
| "loss": 0.2136, | |
| "step": 18760 | |
| }, | |
| { | |
| "epoch": 91.66564978645516, | |
| "grad_norm": 4.474179267883301, | |
| "learning_rate": 7.750819672131147e-05, | |
| "loss": 0.2454, | |
| "step": 18780 | |
| }, | |
| { | |
| "epoch": 91.76327028676022, | |
| "grad_norm": 4.39286994934082, | |
| "learning_rate": 7.737704918032788e-05, | |
| "loss": 0.2377, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 91.86089078706529, | |
| "grad_norm": 2.924795627593994, | |
| "learning_rate": 7.724590163934426e-05, | |
| "loss": 0.2475, | |
| "step": 18820 | |
| }, | |
| { | |
| "epoch": 91.95851128737034, | |
| "grad_norm": 3.4778449535369873, | |
| "learning_rate": 7.711475409836067e-05, | |
| "loss": 0.2712, | |
| "step": 18840 | |
| }, | |
| { | |
| "epoch": 92.05613178767541, | |
| "grad_norm": 5.152170181274414, | |
| "learning_rate": 7.698360655737706e-05, | |
| "loss": 0.2336, | |
| "step": 18860 | |
| }, | |
| { | |
| "epoch": 92.15375228798048, | |
| "grad_norm": 2.8238580226898193, | |
| "learning_rate": 7.685245901639345e-05, | |
| "loss": 0.2504, | |
| "step": 18880 | |
| }, | |
| { | |
| "epoch": 92.25137278828554, | |
| "grad_norm": 3.870866537094116, | |
| "learning_rate": 7.672131147540984e-05, | |
| "loss": 0.25, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 92.3489932885906, | |
| "grad_norm": 5.052824020385742, | |
| "learning_rate": 7.659016393442622e-05, | |
| "loss": 0.2517, | |
| "step": 18920 | |
| }, | |
| { | |
| "epoch": 92.44661378889568, | |
| "grad_norm": 3.048907518386841, | |
| "learning_rate": 7.645901639344263e-05, | |
| "loss": 0.2347, | |
| "step": 18940 | |
| }, | |
| { | |
| "epoch": 92.54423428920073, | |
| "grad_norm": 3.2473690509796143, | |
| "learning_rate": 7.632786885245902e-05, | |
| "loss": 0.2362, | |
| "step": 18960 | |
| }, | |
| { | |
| "epoch": 92.6418547895058, | |
| "grad_norm": 4.635523319244385, | |
| "learning_rate": 7.619672131147542e-05, | |
| "loss": 0.2274, | |
| "step": 18980 | |
| }, | |
| { | |
| "epoch": 92.73947528981085, | |
| "grad_norm": 2.6211893558502197, | |
| "learning_rate": 7.606557377049181e-05, | |
| "loss": 0.2258, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 92.83709579011592, | |
| "grad_norm": 3.725900888442993, | |
| "learning_rate": 7.59344262295082e-05, | |
| "loss": 0.2407, | |
| "step": 19020 | |
| }, | |
| { | |
| "epoch": 92.934716290421, | |
| "grad_norm": 4.358538627624512, | |
| "learning_rate": 7.580327868852459e-05, | |
| "loss": 0.2268, | |
| "step": 19040 | |
| }, | |
| { | |
| "epoch": 93.03233679072605, | |
| "grad_norm": 3.608492851257324, | |
| "learning_rate": 7.567213114754099e-05, | |
| "loss": 0.2114, | |
| "step": 19060 | |
| }, | |
| { | |
| "epoch": 93.12995729103112, | |
| "grad_norm": 3.455113410949707, | |
| "learning_rate": 7.554098360655738e-05, | |
| "loss": 0.2194, | |
| "step": 19080 | |
| }, | |
| { | |
| "epoch": 93.22757779133617, | |
| "grad_norm": 3.443164825439453, | |
| "learning_rate": 7.540983606557377e-05, | |
| "loss": 0.2305, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 93.32519829164124, | |
| "grad_norm": 3.579775810241699, | |
| "learning_rate": 7.527868852459017e-05, | |
| "loss": 0.2358, | |
| "step": 19120 | |
| }, | |
| { | |
| "epoch": 93.42281879194631, | |
| "grad_norm": 2.8307318687438965, | |
| "learning_rate": 7.514754098360656e-05, | |
| "loss": 0.2289, | |
| "step": 19140 | |
| }, | |
| { | |
| "epoch": 93.52043929225137, | |
| "grad_norm": 4.2809834480285645, | |
| "learning_rate": 7.501639344262295e-05, | |
| "loss": 0.2246, | |
| "step": 19160 | |
| }, | |
| { | |
| "epoch": 93.61805979255644, | |
| "grad_norm": 4.093430042266846, | |
| "learning_rate": 7.488524590163934e-05, | |
| "loss": 0.2416, | |
| "step": 19180 | |
| }, | |
| { | |
| "epoch": 93.7156802928615, | |
| "grad_norm": 3.562998056411743, | |
| "learning_rate": 7.475409836065574e-05, | |
| "loss": 0.2453, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 93.81330079316656, | |
| "grad_norm": 3.611504554748535, | |
| "learning_rate": 7.462295081967213e-05, | |
| "loss": 0.2389, | |
| "step": 19220 | |
| }, | |
| { | |
| "epoch": 93.91092129347163, | |
| "grad_norm": 3.638408899307251, | |
| "learning_rate": 7.449180327868854e-05, | |
| "loss": 0.2354, | |
| "step": 19240 | |
| }, | |
| { | |
| "epoch": 94.00854179377669, | |
| "grad_norm": 2.646662473678589, | |
| "learning_rate": 7.436065573770493e-05, | |
| "loss": 0.2588, | |
| "step": 19260 | |
| }, | |
| { | |
| "epoch": 94.10616229408176, | |
| "grad_norm": 2.8512191772460938, | |
| "learning_rate": 7.422950819672131e-05, | |
| "loss": 0.2219, | |
| "step": 19280 | |
| }, | |
| { | |
| "epoch": 94.20378279438683, | |
| "grad_norm": 4.569155693054199, | |
| "learning_rate": 7.40983606557377e-05, | |
| "loss": 0.2151, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 94.30140329469188, | |
| "grad_norm": 3.036120653152466, | |
| "learning_rate": 7.39672131147541e-05, | |
| "loss": 0.2133, | |
| "step": 19320 | |
| }, | |
| { | |
| "epoch": 94.39902379499695, | |
| "grad_norm": 3.7911667823791504, | |
| "learning_rate": 7.38360655737705e-05, | |
| "loss": 0.206, | |
| "step": 19340 | |
| }, | |
| { | |
| "epoch": 94.49664429530202, | |
| "grad_norm": 3.4659411907196045, | |
| "learning_rate": 7.370491803278689e-05, | |
| "loss": 0.2175, | |
| "step": 19360 | |
| }, | |
| { | |
| "epoch": 94.59426479560707, | |
| "grad_norm": 3.7196578979492188, | |
| "learning_rate": 7.357377049180329e-05, | |
| "loss": 0.2508, | |
| "step": 19380 | |
| }, | |
| { | |
| "epoch": 94.69188529591214, | |
| "grad_norm": 3.499638557434082, | |
| "learning_rate": 7.344262295081968e-05, | |
| "loss": 0.2595, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 94.7895057962172, | |
| "grad_norm": 3.3197853565216064, | |
| "learning_rate": 7.331147540983607e-05, | |
| "loss": 0.2498, | |
| "step": 19420 | |
| }, | |
| { | |
| "epoch": 94.88712629652227, | |
| "grad_norm": 3.819153308868408, | |
| "learning_rate": 7.318032786885246e-05, | |
| "loss": 0.24, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 94.98474679682734, | |
| "grad_norm": 3.229252815246582, | |
| "learning_rate": 7.304918032786886e-05, | |
| "loss": 0.2402, | |
| "step": 19460 | |
| }, | |
| { | |
| "epoch": 95.0823672971324, | |
| "grad_norm": 3.8045654296875, | |
| "learning_rate": 7.291803278688525e-05, | |
| "loss": 0.2235, | |
| "step": 19480 | |
| }, | |
| { | |
| "epoch": 95.17998779743746, | |
| "grad_norm": 3.8064322471618652, | |
| "learning_rate": 7.278688524590165e-05, | |
| "loss": 0.2374, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 95.27760829774253, | |
| "grad_norm": 2.9170637130737305, | |
| "learning_rate": 7.265573770491804e-05, | |
| "loss": 0.223, | |
| "step": 19520 | |
| }, | |
| { | |
| "epoch": 95.37522879804759, | |
| "grad_norm": 3.6795055866241455, | |
| "learning_rate": 7.252459016393443e-05, | |
| "loss": 0.2256, | |
| "step": 19540 | |
| }, | |
| { | |
| "epoch": 95.47284929835266, | |
| "grad_norm": 3.1766109466552734, | |
| "learning_rate": 7.239344262295082e-05, | |
| "loss": 0.2536, | |
| "step": 19560 | |
| }, | |
| { | |
| "epoch": 95.57046979865771, | |
| "grad_norm": 3.862264633178711, | |
| "learning_rate": 7.226229508196721e-05, | |
| "loss": 0.2339, | |
| "step": 19580 | |
| }, | |
| { | |
| "epoch": 95.66809029896278, | |
| "grad_norm": 4.323207855224609, | |
| "learning_rate": 7.213114754098361e-05, | |
| "loss": 0.2434, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 95.76571079926785, | |
| "grad_norm": 2.973966598510742, | |
| "learning_rate": 7.2e-05, | |
| "loss": 0.219, | |
| "step": 19620 | |
| }, | |
| { | |
| "epoch": 95.8633312995729, | |
| "grad_norm": 3.5967094898223877, | |
| "learning_rate": 7.18688524590164e-05, | |
| "loss": 0.243, | |
| "step": 19640 | |
| }, | |
| { | |
| "epoch": 95.96095179987798, | |
| "grad_norm": 3.993166923522949, | |
| "learning_rate": 7.17377049180328e-05, | |
| "loss": 0.2263, | |
| "step": 19660 | |
| }, | |
| { | |
| "epoch": 96.05857230018304, | |
| "grad_norm": 3.425034284591675, | |
| "learning_rate": 7.160655737704918e-05, | |
| "loss": 0.2158, | |
| "step": 19680 | |
| }, | |
| { | |
| "epoch": 96.1561928004881, | |
| "grad_norm": 3.4514410495758057, | |
| "learning_rate": 7.147540983606557e-05, | |
| "loss": 0.1976, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 96.25381330079317, | |
| "grad_norm": 5.355749607086182, | |
| "learning_rate": 7.134426229508198e-05, | |
| "loss": 0.221, | |
| "step": 19720 | |
| }, | |
| { | |
| "epoch": 96.35143380109822, | |
| "grad_norm": 3.296389102935791, | |
| "learning_rate": 7.121311475409837e-05, | |
| "loss": 0.2606, | |
| "step": 19740 | |
| }, | |
| { | |
| "epoch": 96.4490543014033, | |
| "grad_norm": 3.1039505004882812, | |
| "learning_rate": 7.108196721311475e-05, | |
| "loss": 0.2532, | |
| "step": 19760 | |
| }, | |
| { | |
| "epoch": 96.54667480170836, | |
| "grad_norm": 3.3837499618530273, | |
| "learning_rate": 7.095081967213116e-05, | |
| "loss": 0.2351, | |
| "step": 19780 | |
| }, | |
| { | |
| "epoch": 96.64429530201342, | |
| "grad_norm": 4.072084426879883, | |
| "learning_rate": 7.081967213114755e-05, | |
| "loss": 0.2214, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 96.74191580231849, | |
| "grad_norm": 3.669661283493042, | |
| "learning_rate": 7.068852459016394e-05, | |
| "loss": 0.2168, | |
| "step": 19820 | |
| }, | |
| { | |
| "epoch": 96.83953630262356, | |
| "grad_norm": 3.449476957321167, | |
| "learning_rate": 7.055737704918033e-05, | |
| "loss": 0.2256, | |
| "step": 19840 | |
| }, | |
| { | |
| "epoch": 96.93715680292861, | |
| "grad_norm": 3.7562010288238525, | |
| "learning_rate": 7.042622950819673e-05, | |
| "loss": 0.233, | |
| "step": 19860 | |
| }, | |
| { | |
| "epoch": 97.03477730323368, | |
| "grad_norm": 3.0482330322265625, | |
| "learning_rate": 7.029508196721312e-05, | |
| "loss": 0.2388, | |
| "step": 19880 | |
| }, | |
| { | |
| "epoch": 97.13239780353874, | |
| "grad_norm": 2.8242592811584473, | |
| "learning_rate": 7.016393442622952e-05, | |
| "loss": 0.247, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 97.2300183038438, | |
| "grad_norm": 3.9347472190856934, | |
| "learning_rate": 7.003278688524591e-05, | |
| "loss": 0.214, | |
| "step": 19920 | |
| }, | |
| { | |
| "epoch": 97.32763880414888, | |
| "grad_norm": 3.7674787044525146, | |
| "learning_rate": 6.99016393442623e-05, | |
| "loss": 0.2321, | |
| "step": 19940 | |
| }, | |
| { | |
| "epoch": 97.42525930445393, | |
| "grad_norm": 3.0752851963043213, | |
| "learning_rate": 6.977049180327869e-05, | |
| "loss": 0.228, | |
| "step": 19960 | |
| }, | |
| { | |
| "epoch": 97.522879804759, | |
| "grad_norm": 4.3538899421691895, | |
| "learning_rate": 6.963934426229508e-05, | |
| "loss": 0.2196, | |
| "step": 19980 | |
| }, | |
| { | |
| "epoch": 97.62050030506407, | |
| "grad_norm": 2.677072763442993, | |
| "learning_rate": 6.950819672131148e-05, | |
| "loss": 0.2519, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 97.71812080536913, | |
| "grad_norm": 3.771855592727661, | |
| "learning_rate": 6.937704918032787e-05, | |
| "loss": 0.1967, | |
| "step": 20020 | |
| }, | |
| { | |
| "epoch": 97.8157413056742, | |
| "grad_norm": 4.431488990783691, | |
| "learning_rate": 6.924590163934427e-05, | |
| "loss": 0.2112, | |
| "step": 20040 | |
| }, | |
| { | |
| "epoch": 97.91336180597925, | |
| "grad_norm": 3.970080852508545, | |
| "learning_rate": 6.911475409836066e-05, | |
| "loss": 0.2295, | |
| "step": 20060 | |
| }, | |
| { | |
| "epoch": 98.01098230628432, | |
| "grad_norm": 2.521176338195801, | |
| "learning_rate": 6.898360655737705e-05, | |
| "loss": 0.2495, | |
| "step": 20080 | |
| }, | |
| { | |
| "epoch": 98.10860280658939, | |
| "grad_norm": 3.8167226314544678, | |
| "learning_rate": 6.885245901639344e-05, | |
| "loss": 0.2123, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 98.20622330689444, | |
| "grad_norm": 3.245234727859497, | |
| "learning_rate": 6.872131147540984e-05, | |
| "loss": 0.229, | |
| "step": 20120 | |
| }, | |
| { | |
| "epoch": 98.30384380719951, | |
| "grad_norm": 3.270099401473999, | |
| "learning_rate": 6.859016393442623e-05, | |
| "loss": 0.1986, | |
| "step": 20140 | |
| }, | |
| { | |
| "epoch": 98.40146430750458, | |
| "grad_norm": 3.133777379989624, | |
| "learning_rate": 6.845901639344262e-05, | |
| "loss": 0.2407, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 98.49908480780964, | |
| "grad_norm": 4.500607490539551, | |
| "learning_rate": 6.832786885245903e-05, | |
| "loss": 0.223, | |
| "step": 20180 | |
| }, | |
| { | |
| "epoch": 98.59670530811471, | |
| "grad_norm": 3.4543895721435547, | |
| "learning_rate": 6.819672131147542e-05, | |
| "loss": 0.2373, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 98.69432580841976, | |
| "grad_norm": 3.2081384658813477, | |
| "learning_rate": 6.80655737704918e-05, | |
| "loss": 0.2262, | |
| "step": 20220 | |
| }, | |
| { | |
| "epoch": 98.79194630872483, | |
| "grad_norm": 4.9004926681518555, | |
| "learning_rate": 6.79344262295082e-05, | |
| "loss": 0.2335, | |
| "step": 20240 | |
| }, | |
| { | |
| "epoch": 98.8895668090299, | |
| "grad_norm": 3.9303877353668213, | |
| "learning_rate": 6.78032786885246e-05, | |
| "loss": 0.2061, | |
| "step": 20260 | |
| }, | |
| { | |
| "epoch": 98.98718730933496, | |
| "grad_norm": 3.722957134246826, | |
| "learning_rate": 6.767213114754099e-05, | |
| "loss": 0.2518, | |
| "step": 20280 | |
| }, | |
| { | |
| "epoch": 99.08480780964003, | |
| "grad_norm": 4.127925872802734, | |
| "learning_rate": 6.754098360655739e-05, | |
| "loss": 0.2133, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 99.1824283099451, | |
| "grad_norm": 4.3639726638793945, | |
| "learning_rate": 6.740983606557378e-05, | |
| "loss": 0.2119, | |
| "step": 20320 | |
| }, | |
| { | |
| "epoch": 99.28004881025015, | |
| "grad_norm": 3.288351058959961, | |
| "learning_rate": 6.727868852459017e-05, | |
| "loss": 0.2307, | |
| "step": 20340 | |
| }, | |
| { | |
| "epoch": 99.37766931055522, | |
| "grad_norm": 3.5033068656921387, | |
| "learning_rate": 6.714754098360656e-05, | |
| "loss": 0.2227, | |
| "step": 20360 | |
| }, | |
| { | |
| "epoch": 99.47528981086027, | |
| "grad_norm": 3.1117262840270996, | |
| "learning_rate": 6.701639344262295e-05, | |
| "loss": 0.2257, | |
| "step": 20380 | |
| }, | |
| { | |
| "epoch": 99.57291031116534, | |
| "grad_norm": 4.022121906280518, | |
| "learning_rate": 6.688524590163935e-05, | |
| "loss": 0.2336, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 99.67053081147041, | |
| "grad_norm": 3.4611172676086426, | |
| "learning_rate": 6.675409836065574e-05, | |
| "loss": 0.2192, | |
| "step": 20420 | |
| }, | |
| { | |
| "epoch": 99.76815131177547, | |
| "grad_norm": 3.3648264408111572, | |
| "learning_rate": 6.662295081967214e-05, | |
| "loss": 0.2482, | |
| "step": 20440 | |
| }, | |
| { | |
| "epoch": 99.86577181208054, | |
| "grad_norm": 3.8535125255584717, | |
| "learning_rate": 6.649180327868853e-05, | |
| "loss": 0.2314, | |
| "step": 20460 | |
| }, | |
| { | |
| "epoch": 99.96339231238561, | |
| "grad_norm": 3.8780901432037354, | |
| "learning_rate": 6.636065573770492e-05, | |
| "loss": 0.2203, | |
| "step": 20480 | |
| }, | |
| { | |
| "epoch": 100.06101281269066, | |
| "grad_norm": 3.479278326034546, | |
| "learning_rate": 6.622950819672131e-05, | |
| "loss": 0.2003, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 100.15863331299573, | |
| "grad_norm": 3.530697822570801, | |
| "learning_rate": 6.609836065573771e-05, | |
| "loss": 0.2084, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 100.25625381330079, | |
| "grad_norm": 3.3677327632904053, | |
| "learning_rate": 6.59672131147541e-05, | |
| "loss": 0.2394, | |
| "step": 20540 | |
| }, | |
| { | |
| "epoch": 100.35387431360586, | |
| "grad_norm": 2.9369733333587646, | |
| "learning_rate": 6.58360655737705e-05, | |
| "loss": 0.2553, | |
| "step": 20560 | |
| }, | |
| { | |
| "epoch": 100.45149481391093, | |
| "grad_norm": 3.9915401935577393, | |
| "learning_rate": 6.57049180327869e-05, | |
| "loss": 0.2393, | |
| "step": 20580 | |
| }, | |
| { | |
| "epoch": 100.54911531421598, | |
| "grad_norm": 3.6804075241088867, | |
| "learning_rate": 6.557377049180327e-05, | |
| "loss": 0.2266, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 100.64673581452105, | |
| "grad_norm": 4.8830156326293945, | |
| "learning_rate": 6.544262295081967e-05, | |
| "loss": 0.19, | |
| "step": 20620 | |
| }, | |
| { | |
| "epoch": 100.74435631482612, | |
| "grad_norm": 4.2519731521606445, | |
| "learning_rate": 6.531147540983606e-05, | |
| "loss": 0.2416, | |
| "step": 20640 | |
| }, | |
| { | |
| "epoch": 100.84197681513118, | |
| "grad_norm": 3.413914203643799, | |
| "learning_rate": 6.518032786885247e-05, | |
| "loss": 0.2026, | |
| "step": 20660 | |
| }, | |
| { | |
| "epoch": 100.93959731543625, | |
| "grad_norm": 3.642609119415283, | |
| "learning_rate": 6.504918032786886e-05, | |
| "loss": 0.2227, | |
| "step": 20680 | |
| }, | |
| { | |
| "epoch": 101.0372178157413, | |
| "grad_norm": 4.770230770111084, | |
| "learning_rate": 6.491803278688526e-05, | |
| "loss": 0.2277, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 101.13483831604637, | |
| "grad_norm": 4.115867614746094, | |
| "learning_rate": 6.478688524590165e-05, | |
| "loss": 0.205, | |
| "step": 20720 | |
| }, | |
| { | |
| "epoch": 101.23245881635144, | |
| "grad_norm": 4.3617048263549805, | |
| "learning_rate": 6.465573770491804e-05, | |
| "loss": 0.2048, | |
| "step": 20740 | |
| }, | |
| { | |
| "epoch": 101.3300793166565, | |
| "grad_norm": 3.818500518798828, | |
| "learning_rate": 6.452459016393443e-05, | |
| "loss": 0.2245, | |
| "step": 20760 | |
| }, | |
| { | |
| "epoch": 101.42769981696156, | |
| "grad_norm": 3.3728935718536377, | |
| "learning_rate": 6.439344262295083e-05, | |
| "loss": 0.2342, | |
| "step": 20780 | |
| }, | |
| { | |
| "epoch": 101.52532031726662, | |
| "grad_norm": 3.2103302478790283, | |
| "learning_rate": 6.426229508196722e-05, | |
| "loss": 0.2024, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 101.62294081757169, | |
| "grad_norm": 3.5993459224700928, | |
| "learning_rate": 6.413114754098361e-05, | |
| "loss": 0.2272, | |
| "step": 20820 | |
| }, | |
| { | |
| "epoch": 101.72056131787676, | |
| "grad_norm": 3.478533983230591, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 0.236, | |
| "step": 20840 | |
| }, | |
| { | |
| "epoch": 101.81818181818181, | |
| "grad_norm": 4.245722770690918, | |
| "learning_rate": 6.386885245901639e-05, | |
| "loss": 0.2246, | |
| "step": 20860 | |
| }, | |
| { | |
| "epoch": 101.91580231848688, | |
| "grad_norm": 3.5051097869873047, | |
| "learning_rate": 6.373770491803279e-05, | |
| "loss": 0.2194, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 102.01342281879195, | |
| "grad_norm": 2.882301092147827, | |
| "learning_rate": 6.360655737704918e-05, | |
| "loss": 0.2196, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 102.111043319097, | |
| "grad_norm": 3.080702066421509, | |
| "learning_rate": 6.347540983606558e-05, | |
| "loss": 0.2109, | |
| "step": 20920 | |
| }, | |
| { | |
| "epoch": 102.20866381940208, | |
| "grad_norm": 2.9892094135284424, | |
| "learning_rate": 6.334426229508197e-05, | |
| "loss": 0.2086, | |
| "step": 20940 | |
| }, | |
| { | |
| "epoch": 102.30628431970713, | |
| "grad_norm": 3.401524782180786, | |
| "learning_rate": 6.321311475409837e-05, | |
| "loss": 0.202, | |
| "step": 20960 | |
| }, | |
| { | |
| "epoch": 102.4039048200122, | |
| "grad_norm": 3.0444400310516357, | |
| "learning_rate": 6.308196721311475e-05, | |
| "loss": 0.2071, | |
| "step": 20980 | |
| }, | |
| { | |
| "epoch": 102.50152532031727, | |
| "grad_norm": 3.028918743133545, | |
| "learning_rate": 6.295081967213115e-05, | |
| "loss": 0.226, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 102.59914582062233, | |
| "grad_norm": 3.2344133853912354, | |
| "learning_rate": 6.281967213114754e-05, | |
| "loss": 0.2417, | |
| "step": 21020 | |
| }, | |
| { | |
| "epoch": 102.6967663209274, | |
| "grad_norm": 3.1719000339508057, | |
| "learning_rate": 6.268852459016393e-05, | |
| "loss": 0.2363, | |
| "step": 21040 | |
| }, | |
| { | |
| "epoch": 102.79438682123246, | |
| "grad_norm": 3.474695920944214, | |
| "learning_rate": 6.255737704918033e-05, | |
| "loss": 0.2278, | |
| "step": 21060 | |
| }, | |
| { | |
| "epoch": 102.89200732153752, | |
| "grad_norm": 4.550293445587158, | |
| "learning_rate": 6.242622950819672e-05, | |
| "loss": 0.2087, | |
| "step": 21080 | |
| }, | |
| { | |
| "epoch": 102.98962782184259, | |
| "grad_norm": 3.7696001529693604, | |
| "learning_rate": 6.229508196721313e-05, | |
| "loss": 0.2305, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 103.08724832214764, | |
| "grad_norm": 3.388397216796875, | |
| "learning_rate": 6.21639344262295e-05, | |
| "loss": 0.2133, | |
| "step": 21120 | |
| }, | |
| { | |
| "epoch": 103.18486882245271, | |
| "grad_norm": 4.243392467498779, | |
| "learning_rate": 6.20327868852459e-05, | |
| "loss": 0.213, | |
| "step": 21140 | |
| }, | |
| { | |
| "epoch": 103.28248932275778, | |
| "grad_norm": 2.646786689758301, | |
| "learning_rate": 6.19016393442623e-05, | |
| "loss": 0.2132, | |
| "step": 21160 | |
| }, | |
| { | |
| "epoch": 103.38010982306284, | |
| "grad_norm": 3.157848596572876, | |
| "learning_rate": 6.17704918032787e-05, | |
| "loss": 0.1941, | |
| "step": 21180 | |
| }, | |
| { | |
| "epoch": 103.47773032336791, | |
| "grad_norm": 3.383357286453247, | |
| "learning_rate": 6.163934426229509e-05, | |
| "loss": 0.2211, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 103.57535082367298, | |
| "grad_norm": 2.940901279449463, | |
| "learning_rate": 6.150819672131148e-05, | |
| "loss": 0.1934, | |
| "step": 21220 | |
| }, | |
| { | |
| "epoch": 103.67297132397803, | |
| "grad_norm": 4.2973103523254395, | |
| "learning_rate": 6.137704918032787e-05, | |
| "loss": 0.2516, | |
| "step": 21240 | |
| }, | |
| { | |
| "epoch": 103.7705918242831, | |
| "grad_norm": 3.586219310760498, | |
| "learning_rate": 6.124590163934426e-05, | |
| "loss": 0.262, | |
| "step": 21260 | |
| }, | |
| { | |
| "epoch": 103.86821232458816, | |
| "grad_norm": 3.7726240158081055, | |
| "learning_rate": 6.111475409836066e-05, | |
| "loss": 0.1876, | |
| "step": 21280 | |
| }, | |
| { | |
| "epoch": 103.96583282489323, | |
| "grad_norm": 3.5038113594055176, | |
| "learning_rate": 6.098360655737705e-05, | |
| "loss": 0.2156, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 104.0634533251983, | |
| "grad_norm": 3.6055893898010254, | |
| "learning_rate": 6.085245901639345e-05, | |
| "loss": 0.2386, | |
| "step": 21320 | |
| }, | |
| { | |
| "epoch": 104.16107382550335, | |
| "grad_norm": 4.037646770477295, | |
| "learning_rate": 6.072131147540984e-05, | |
| "loss": 0.229, | |
| "step": 21340 | |
| }, | |
| { | |
| "epoch": 104.25869432580842, | |
| "grad_norm": 3.209284543991089, | |
| "learning_rate": 6.0590163934426236e-05, | |
| "loss": 0.2204, | |
| "step": 21360 | |
| }, | |
| { | |
| "epoch": 104.35631482611349, | |
| "grad_norm": 3.388456106185913, | |
| "learning_rate": 6.0459016393442625e-05, | |
| "loss": 0.2155, | |
| "step": 21380 | |
| }, | |
| { | |
| "epoch": 104.45393532641855, | |
| "grad_norm": 2.9914731979370117, | |
| "learning_rate": 6.032786885245902e-05, | |
| "loss": 0.2205, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 104.55155582672361, | |
| "grad_norm": 3.4753572940826416, | |
| "learning_rate": 6.019672131147541e-05, | |
| "loss": 0.226, | |
| "step": 21420 | |
| }, | |
| { | |
| "epoch": 104.64917632702867, | |
| "grad_norm": 3.400149345397949, | |
| "learning_rate": 6.00655737704918e-05, | |
| "loss": 0.2117, | |
| "step": 21440 | |
| }, | |
| { | |
| "epoch": 104.74679682733374, | |
| "grad_norm": 3.2313406467437744, | |
| "learning_rate": 5.99344262295082e-05, | |
| "loss": 0.216, | |
| "step": 21460 | |
| }, | |
| { | |
| "epoch": 104.84441732763881, | |
| "grad_norm": 4.102112770080566, | |
| "learning_rate": 5.9803278688524586e-05, | |
| "loss": 0.1962, | |
| "step": 21480 | |
| }, | |
| { | |
| "epoch": 104.94203782794386, | |
| "grad_norm": 3.6872830390930176, | |
| "learning_rate": 5.967213114754099e-05, | |
| "loss": 0.2212, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 105.03965832824893, | |
| "grad_norm": 3.1488096714019775, | |
| "learning_rate": 5.954098360655738e-05, | |
| "loss": 0.1919, | |
| "step": 21520 | |
| }, | |
| { | |
| "epoch": 105.137278828554, | |
| "grad_norm": 3.4388973712921143, | |
| "learning_rate": 5.9409836065573774e-05, | |
| "loss": 0.247, | |
| "step": 21540 | |
| }, | |
| { | |
| "epoch": 105.23489932885906, | |
| "grad_norm": 3.776465892791748, | |
| "learning_rate": 5.927868852459016e-05, | |
| "loss": 0.1856, | |
| "step": 21560 | |
| }, | |
| { | |
| "epoch": 105.33251982916413, | |
| "grad_norm": 3.552684783935547, | |
| "learning_rate": 5.9147540983606566e-05, | |
| "loss": 0.2042, | |
| "step": 21580 | |
| }, | |
| { | |
| "epoch": 105.43014032946918, | |
| "grad_norm": 2.8810436725616455, | |
| "learning_rate": 5.9016393442622956e-05, | |
| "loss": 0.2353, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 105.52776082977425, | |
| "grad_norm": 3.2408454418182373, | |
| "learning_rate": 5.888524590163935e-05, | |
| "loss": 0.2148, | |
| "step": 21620 | |
| }, | |
| { | |
| "epoch": 105.62538133007932, | |
| "grad_norm": 3.5531325340270996, | |
| "learning_rate": 5.875409836065574e-05, | |
| "loss": 0.2106, | |
| "step": 21640 | |
| }, | |
| { | |
| "epoch": 105.72300183038438, | |
| "grad_norm": 4.188174247741699, | |
| "learning_rate": 5.862295081967213e-05, | |
| "loss": 0.2211, | |
| "step": 21660 | |
| }, | |
| { | |
| "epoch": 105.82062233068945, | |
| "grad_norm": 4.232003688812256, | |
| "learning_rate": 5.849180327868853e-05, | |
| "loss": 0.2139, | |
| "step": 21680 | |
| }, | |
| { | |
| "epoch": 105.91824283099452, | |
| "grad_norm": 3.7038660049438477, | |
| "learning_rate": 5.8360655737704916e-05, | |
| "loss": 0.2048, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 106.01586333129957, | |
| "grad_norm": 2.9898719787597656, | |
| "learning_rate": 5.822950819672132e-05, | |
| "loss": 0.2184, | |
| "step": 21720 | |
| }, | |
| { | |
| "epoch": 106.11348383160464, | |
| "grad_norm": 3.482365846633911, | |
| "learning_rate": 5.80983606557377e-05, | |
| "loss": 0.2171, | |
| "step": 21740 | |
| }, | |
| { | |
| "epoch": 106.2111043319097, | |
| "grad_norm": 3.3595712184906006, | |
| "learning_rate": 5.7967213114754104e-05, | |
| "loss": 0.2041, | |
| "step": 21760 | |
| }, | |
| { | |
| "epoch": 106.30872483221476, | |
| "grad_norm": 3.7470903396606445, | |
| "learning_rate": 5.7836065573770494e-05, | |
| "loss": 0.1946, | |
| "step": 21780 | |
| }, | |
| { | |
| "epoch": 106.40634533251983, | |
| "grad_norm": 3.418549060821533, | |
| "learning_rate": 5.770491803278689e-05, | |
| "loss": 0.2013, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 106.50396583282489, | |
| "grad_norm": 3.115424394607544, | |
| "learning_rate": 5.757377049180328e-05, | |
| "loss": 0.2064, | |
| "step": 21820 | |
| }, | |
| { | |
| "epoch": 106.60158633312996, | |
| "grad_norm": 3.2718710899353027, | |
| "learning_rate": 5.744262295081968e-05, | |
| "loss": 0.2266, | |
| "step": 21840 | |
| }, | |
| { | |
| "epoch": 106.69920683343503, | |
| "grad_norm": 2.9478771686553955, | |
| "learning_rate": 5.731147540983607e-05, | |
| "loss": 0.2321, | |
| "step": 21860 | |
| }, | |
| { | |
| "epoch": 106.79682733374008, | |
| "grad_norm": 3.479456663131714, | |
| "learning_rate": 5.7180327868852454e-05, | |
| "loss": 0.2317, | |
| "step": 21880 | |
| }, | |
| { | |
| "epoch": 106.89444783404515, | |
| "grad_norm": 3.934882879257202, | |
| "learning_rate": 5.704918032786886e-05, | |
| "loss": 0.2074, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 106.99206833435021, | |
| "grad_norm": 3.40720272064209, | |
| "learning_rate": 5.6918032786885246e-05, | |
| "loss": 0.2123, | |
| "step": 21920 | |
| }, | |
| { | |
| "epoch": 107.08968883465528, | |
| "grad_norm": 2.825427770614624, | |
| "learning_rate": 5.678688524590164e-05, | |
| "loss": 0.1897, | |
| "step": 21940 | |
| }, | |
| { | |
| "epoch": 107.18730933496035, | |
| "grad_norm": 3.6434717178344727, | |
| "learning_rate": 5.665573770491803e-05, | |
| "loss": 0.207, | |
| "step": 21960 | |
| }, | |
| { | |
| "epoch": 107.2849298352654, | |
| "grad_norm": 4.119269847869873, | |
| "learning_rate": 5.6524590163934435e-05, | |
| "loss": 0.2086, | |
| "step": 21980 | |
| }, | |
| { | |
| "epoch": 107.38255033557047, | |
| "grad_norm": 2.835963487625122, | |
| "learning_rate": 5.639344262295082e-05, | |
| "loss": 0.1942, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 107.48017083587554, | |
| "grad_norm": 3.175858736038208, | |
| "learning_rate": 5.626229508196722e-05, | |
| "loss": 0.2089, | |
| "step": 22020 | |
| }, | |
| { | |
| "epoch": 107.5777913361806, | |
| "grad_norm": 3.6265509128570557, | |
| "learning_rate": 5.613114754098361e-05, | |
| "loss": 0.2164, | |
| "step": 22040 | |
| }, | |
| { | |
| "epoch": 107.67541183648567, | |
| "grad_norm": 2.90639591217041, | |
| "learning_rate": 5.6000000000000006e-05, | |
| "loss": 0.2121, | |
| "step": 22060 | |
| }, | |
| { | |
| "epoch": 107.77303233679072, | |
| "grad_norm": 4.155264377593994, | |
| "learning_rate": 5.5868852459016395e-05, | |
| "loss": 0.219, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 107.87065283709579, | |
| "grad_norm": 3.730433940887451, | |
| "learning_rate": 5.5737704918032785e-05, | |
| "loss": 0.2369, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 107.96827333740086, | |
| "grad_norm": 2.8690969944000244, | |
| "learning_rate": 5.560655737704919e-05, | |
| "loss": 0.2189, | |
| "step": 22120 | |
| }, | |
| { | |
| "epoch": 108.06589383770591, | |
| "grad_norm": 3.061427116394043, | |
| "learning_rate": 5.547540983606557e-05, | |
| "loss": 0.2203, | |
| "step": 22140 | |
| }, | |
| { | |
| "epoch": 108.16351433801098, | |
| "grad_norm": 3.6155471801757812, | |
| "learning_rate": 5.534426229508197e-05, | |
| "loss": 0.2043, | |
| "step": 22160 | |
| }, | |
| { | |
| "epoch": 108.26113483831605, | |
| "grad_norm": 3.1074283123016357, | |
| "learning_rate": 5.521311475409836e-05, | |
| "loss": 0.2095, | |
| "step": 22180 | |
| }, | |
| { | |
| "epoch": 108.35875533862111, | |
| "grad_norm": 3.6168534755706787, | |
| "learning_rate": 5.508196721311476e-05, | |
| "loss": 0.2339, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 108.45637583892618, | |
| "grad_norm": 2.9254798889160156, | |
| "learning_rate": 5.495081967213115e-05, | |
| "loss": 0.214, | |
| "step": 22220 | |
| }, | |
| { | |
| "epoch": 108.55399633923123, | |
| "grad_norm": 3.2898197174072266, | |
| "learning_rate": 5.481967213114755e-05, | |
| "loss": 0.1981, | |
| "step": 22240 | |
| }, | |
| { | |
| "epoch": 108.6516168395363, | |
| "grad_norm": 3.0724892616271973, | |
| "learning_rate": 5.4688524590163933e-05, | |
| "loss": 0.1997, | |
| "step": 22260 | |
| }, | |
| { | |
| "epoch": 108.74923733984137, | |
| "grad_norm": 3.3344459533691406, | |
| "learning_rate": 5.4557377049180336e-05, | |
| "loss": 0.2243, | |
| "step": 22280 | |
| }, | |
| { | |
| "epoch": 108.84685784014643, | |
| "grad_norm": 4.20386266708374, | |
| "learning_rate": 5.4426229508196726e-05, | |
| "loss": 0.1973, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 108.9444783404515, | |
| "grad_norm": 3.0647547245025635, | |
| "learning_rate": 5.4295081967213115e-05, | |
| "loss": 0.1984, | |
| "step": 22320 | |
| }, | |
| { | |
| "epoch": 109.04209884075657, | |
| "grad_norm": 3.2792584896087646, | |
| "learning_rate": 5.416393442622951e-05, | |
| "loss": 0.2209, | |
| "step": 22340 | |
| }, | |
| { | |
| "epoch": 109.13971934106162, | |
| "grad_norm": 2.900493860244751, | |
| "learning_rate": 5.40327868852459e-05, | |
| "loss": 0.2123, | |
| "step": 22360 | |
| }, | |
| { | |
| "epoch": 109.23733984136669, | |
| "grad_norm": 2.754514217376709, | |
| "learning_rate": 5.3901639344262304e-05, | |
| "loss": 0.2049, | |
| "step": 22380 | |
| }, | |
| { | |
| "epoch": 109.33496034167175, | |
| "grad_norm": 2.955946445465088, | |
| "learning_rate": 5.3770491803278686e-05, | |
| "loss": 0.2164, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 109.43258084197682, | |
| "grad_norm": 3.0447773933410645, | |
| "learning_rate": 5.363934426229509e-05, | |
| "loss": 0.1903, | |
| "step": 22420 | |
| }, | |
| { | |
| "epoch": 109.53020134228188, | |
| "grad_norm": 2.8788065910339355, | |
| "learning_rate": 5.350819672131148e-05, | |
| "loss": 0.2018, | |
| "step": 22440 | |
| }, | |
| { | |
| "epoch": 109.62782184258694, | |
| "grad_norm": 3.766073703765869, | |
| "learning_rate": 5.3377049180327875e-05, | |
| "loss": 0.2092, | |
| "step": 22460 | |
| }, | |
| { | |
| "epoch": 109.72544234289201, | |
| "grad_norm": 4.014832973480225, | |
| "learning_rate": 5.3245901639344264e-05, | |
| "loss": 0.2138, | |
| "step": 22480 | |
| }, | |
| { | |
| "epoch": 109.82306284319706, | |
| "grad_norm": 2.987813949584961, | |
| "learning_rate": 5.311475409836065e-05, | |
| "loss": 0.2253, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 109.92068334350213, | |
| "grad_norm": 2.980419158935547, | |
| "learning_rate": 5.298360655737705e-05, | |
| "loss": 0.2159, | |
| "step": 22520 | |
| }, | |
| { | |
| "epoch": 110.0183038438072, | |
| "grad_norm": 3.1005942821502686, | |
| "learning_rate": 5.285245901639344e-05, | |
| "loss": 0.1907, | |
| "step": 22540 | |
| }, | |
| { | |
| "epoch": 110.11592434411226, | |
| "grad_norm": 3.2539725303649902, | |
| "learning_rate": 5.272131147540984e-05, | |
| "loss": 0.2023, | |
| "step": 22560 | |
| }, | |
| { | |
| "epoch": 110.21354484441733, | |
| "grad_norm": 3.4975571632385254, | |
| "learning_rate": 5.259016393442623e-05, | |
| "loss": 0.2101, | |
| "step": 22580 | |
| }, | |
| { | |
| "epoch": 110.3111653447224, | |
| "grad_norm": 4.542675018310547, | |
| "learning_rate": 5.245901639344263e-05, | |
| "loss": 0.2071, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 110.40878584502745, | |
| "grad_norm": 2.5775978565216064, | |
| "learning_rate": 5.2327868852459017e-05, | |
| "loss": 0.2027, | |
| "step": 22620 | |
| }, | |
| { | |
| "epoch": 110.50640634533252, | |
| "grad_norm": 2.964486598968506, | |
| "learning_rate": 5.219672131147541e-05, | |
| "loss": 0.2281, | |
| "step": 22640 | |
| }, | |
| { | |
| "epoch": 110.60402684563758, | |
| "grad_norm": 3.883513927459717, | |
| "learning_rate": 5.20655737704918e-05, | |
| "loss": 0.2083, | |
| "step": 22660 | |
| }, | |
| { | |
| "epoch": 110.70164734594265, | |
| "grad_norm": 3.696744441986084, | |
| "learning_rate": 5.1934426229508205e-05, | |
| "loss": 0.2044, | |
| "step": 22680 | |
| }, | |
| { | |
| "epoch": 110.79926784624772, | |
| "grad_norm": 3.104335308074951, | |
| "learning_rate": 5.1803278688524594e-05, | |
| "loss": 0.216, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 110.89688834655277, | |
| "grad_norm": 3.6240875720977783, | |
| "learning_rate": 5.1672131147540984e-05, | |
| "loss": 0.2086, | |
| "step": 22720 | |
| }, | |
| { | |
| "epoch": 110.99450884685784, | |
| "grad_norm": 4.600063323974609, | |
| "learning_rate": 5.154098360655738e-05, | |
| "loss": 0.1983, | |
| "step": 22740 | |
| }, | |
| { | |
| "epoch": 111.09212934716291, | |
| "grad_norm": 4.165120601654053, | |
| "learning_rate": 5.140983606557377e-05, | |
| "loss": 0.207, | |
| "step": 22760 | |
| }, | |
| { | |
| "epoch": 111.18974984746797, | |
| "grad_norm": 3.1178033351898193, | |
| "learning_rate": 5.1278688524590165e-05, | |
| "loss": 0.1836, | |
| "step": 22780 | |
| }, | |
| { | |
| "epoch": 111.28737034777303, | |
| "grad_norm": 3.8547523021698, | |
| "learning_rate": 5.1147540983606555e-05, | |
| "loss": 0.213, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 111.38499084807809, | |
| "grad_norm": 3.626835346221924, | |
| "learning_rate": 5.101639344262296e-05, | |
| "loss": 0.1901, | |
| "step": 22820 | |
| }, | |
| { | |
| "epoch": 111.48261134838316, | |
| "grad_norm": 3.89408540725708, | |
| "learning_rate": 5.088524590163935e-05, | |
| "loss": 0.2151, | |
| "step": 22840 | |
| }, | |
| { | |
| "epoch": 111.58023184868823, | |
| "grad_norm": 3.3434460163116455, | |
| "learning_rate": 5.075409836065574e-05, | |
| "loss": 0.2165, | |
| "step": 22860 | |
| }, | |
| { | |
| "epoch": 111.67785234899328, | |
| "grad_norm": 3.1831305027008057, | |
| "learning_rate": 5.062295081967213e-05, | |
| "loss": 0.1911, | |
| "step": 22880 | |
| }, | |
| { | |
| "epoch": 111.77547284929835, | |
| "grad_norm": 3.6153972148895264, | |
| "learning_rate": 5.049180327868853e-05, | |
| "loss": 0.2076, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 111.87309334960342, | |
| "grad_norm": 2.87998628616333, | |
| "learning_rate": 5.036065573770492e-05, | |
| "loss": 0.2121, | |
| "step": 22920 | |
| }, | |
| { | |
| "epoch": 111.97071384990848, | |
| "grad_norm": 2.9351823329925537, | |
| "learning_rate": 5.022950819672131e-05, | |
| "loss": 0.2126, | |
| "step": 22940 | |
| }, | |
| { | |
| "epoch": 112.06833435021355, | |
| "grad_norm": 3.098568916320801, | |
| "learning_rate": 5.009836065573771e-05, | |
| "loss": 0.2169, | |
| "step": 22960 | |
| }, | |
| { | |
| "epoch": 112.1659548505186, | |
| "grad_norm": 4.321131229400635, | |
| "learning_rate": 4.99672131147541e-05, | |
| "loss": 0.2028, | |
| "step": 22980 | |
| }, | |
| { | |
| "epoch": 112.26357535082367, | |
| "grad_norm": 2.1096384525299072, | |
| "learning_rate": 4.9836065573770496e-05, | |
| "loss": 0.2041, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 112.36119585112874, | |
| "grad_norm": 3.537277936935425, | |
| "learning_rate": 4.970491803278689e-05, | |
| "loss": 0.1988, | |
| "step": 23020 | |
| }, | |
| { | |
| "epoch": 112.4588163514338, | |
| "grad_norm": 4.58275842666626, | |
| "learning_rate": 4.957377049180328e-05, | |
| "loss": 0.1855, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 112.55643685173887, | |
| "grad_norm": 3.290548086166382, | |
| "learning_rate": 4.944262295081967e-05, | |
| "loss": 0.2041, | |
| "step": 23060 | |
| }, | |
| { | |
| "epoch": 112.65405735204394, | |
| "grad_norm": 4.11199951171875, | |
| "learning_rate": 4.931147540983607e-05, | |
| "loss": 0.2062, | |
| "step": 23080 | |
| }, | |
| { | |
| "epoch": 112.75167785234899, | |
| "grad_norm": 4.122567176818848, | |
| "learning_rate": 4.918032786885246e-05, | |
| "loss": 0.2003, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 112.84929835265406, | |
| "grad_norm": 2.775681495666504, | |
| "learning_rate": 4.904918032786885e-05, | |
| "loss": 0.2201, | |
| "step": 23120 | |
| }, | |
| { | |
| "epoch": 112.94691885295912, | |
| "grad_norm": 2.8952252864837646, | |
| "learning_rate": 4.891803278688525e-05, | |
| "loss": 0.2068, | |
| "step": 23140 | |
| }, | |
| { | |
| "epoch": 113.04453935326418, | |
| "grad_norm": 2.8353431224823, | |
| "learning_rate": 4.8786885245901645e-05, | |
| "loss": 0.2087, | |
| "step": 23160 | |
| }, | |
| { | |
| "epoch": 113.14215985356925, | |
| "grad_norm": 4.125601768493652, | |
| "learning_rate": 4.8655737704918034e-05, | |
| "loss": 0.1973, | |
| "step": 23180 | |
| }, | |
| { | |
| "epoch": 113.23978035387431, | |
| "grad_norm": 3.5090670585632324, | |
| "learning_rate": 4.852459016393443e-05, | |
| "loss": 0.1847, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 113.33740085417938, | |
| "grad_norm": 2.688890218734741, | |
| "learning_rate": 4.8393442622950826e-05, | |
| "loss": 0.1842, | |
| "step": 23220 | |
| }, | |
| { | |
| "epoch": 113.43502135448445, | |
| "grad_norm": 4.937670707702637, | |
| "learning_rate": 4.8262295081967216e-05, | |
| "loss": 0.214, | |
| "step": 23240 | |
| }, | |
| { | |
| "epoch": 113.5326418547895, | |
| "grad_norm": 4.070579528808594, | |
| "learning_rate": 4.8131147540983605e-05, | |
| "loss": 0.2203, | |
| "step": 23260 | |
| }, | |
| { | |
| "epoch": 113.63026235509457, | |
| "grad_norm": 3.052457809448242, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.2116, | |
| "step": 23280 | |
| }, | |
| { | |
| "epoch": 113.72788285539963, | |
| "grad_norm": 3.2423150539398193, | |
| "learning_rate": 4.78688524590164e-05, | |
| "loss": 0.192, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 113.8255033557047, | |
| "grad_norm": 2.9941837787628174, | |
| "learning_rate": 4.773770491803279e-05, | |
| "loss": 0.1916, | |
| "step": 23320 | |
| }, | |
| { | |
| "epoch": 113.92312385600977, | |
| "grad_norm": 3.3031139373779297, | |
| "learning_rate": 4.760655737704918e-05, | |
| "loss": 0.2261, | |
| "step": 23340 | |
| }, | |
| { | |
| "epoch": 114.02074435631482, | |
| "grad_norm": 3.531646490097046, | |
| "learning_rate": 4.747540983606558e-05, | |
| "loss": 0.2186, | |
| "step": 23360 | |
| }, | |
| { | |
| "epoch": 114.11836485661989, | |
| "grad_norm": 2.753261089324951, | |
| "learning_rate": 4.734426229508197e-05, | |
| "loss": 0.1979, | |
| "step": 23380 | |
| }, | |
| { | |
| "epoch": 114.21598535692496, | |
| "grad_norm": 3.2900078296661377, | |
| "learning_rate": 4.7213114754098365e-05, | |
| "loss": 0.1892, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 114.31360585723002, | |
| "grad_norm": 3.6558027267456055, | |
| "learning_rate": 4.708196721311476e-05, | |
| "loss": 0.2202, | |
| "step": 23420 | |
| }, | |
| { | |
| "epoch": 114.41122635753509, | |
| "grad_norm": 2.9026436805725098, | |
| "learning_rate": 4.695081967213115e-05, | |
| "loss": 0.2193, | |
| "step": 23440 | |
| }, | |
| { | |
| "epoch": 114.50884685784014, | |
| "grad_norm": 3.0958666801452637, | |
| "learning_rate": 4.681967213114754e-05, | |
| "loss": 0.1861, | |
| "step": 23460 | |
| }, | |
| { | |
| "epoch": 114.60646735814521, | |
| "grad_norm": 3.4162192344665527, | |
| "learning_rate": 4.6688524590163936e-05, | |
| "loss": 0.1813, | |
| "step": 23480 | |
| }, | |
| { | |
| "epoch": 114.70408785845028, | |
| "grad_norm": 3.4320363998413086, | |
| "learning_rate": 4.655737704918033e-05, | |
| "loss": 0.1849, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 114.80170835875533, | |
| "grad_norm": 3.931405544281006, | |
| "learning_rate": 4.642622950819672e-05, | |
| "loss": 0.2091, | |
| "step": 23520 | |
| }, | |
| { | |
| "epoch": 114.8993288590604, | |
| "grad_norm": 2.463747024536133, | |
| "learning_rate": 4.629508196721312e-05, | |
| "loss": 0.2013, | |
| "step": 23540 | |
| }, | |
| { | |
| "epoch": 114.99694935936547, | |
| "grad_norm": 3.4332265853881836, | |
| "learning_rate": 4.616393442622951e-05, | |
| "loss": 0.2224, | |
| "step": 23560 | |
| }, | |
| { | |
| "epoch": 115.09456985967053, | |
| "grad_norm": 4.516382217407227, | |
| "learning_rate": 4.60327868852459e-05, | |
| "loss": 0.191, | |
| "step": 23580 | |
| }, | |
| { | |
| "epoch": 115.1921903599756, | |
| "grad_norm": 3.7761762142181396, | |
| "learning_rate": 4.59016393442623e-05, | |
| "loss": 0.1972, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 115.28981086028065, | |
| "grad_norm": 3.425079584121704, | |
| "learning_rate": 4.5770491803278695e-05, | |
| "loss": 0.2053, | |
| "step": 23620 | |
| }, | |
| { | |
| "epoch": 115.38743136058572, | |
| "grad_norm": 3.098958969116211, | |
| "learning_rate": 4.5639344262295084e-05, | |
| "loss": 0.1994, | |
| "step": 23640 | |
| }, | |
| { | |
| "epoch": 115.48505186089079, | |
| "grad_norm": 4.242363452911377, | |
| "learning_rate": 4.550819672131148e-05, | |
| "loss": 0.2062, | |
| "step": 23660 | |
| }, | |
| { | |
| "epoch": 115.58267236119585, | |
| "grad_norm": 3.9641380310058594, | |
| "learning_rate": 4.537704918032787e-05, | |
| "loss": 0.1892, | |
| "step": 23680 | |
| }, | |
| { | |
| "epoch": 115.68029286150092, | |
| "grad_norm": 3.817915678024292, | |
| "learning_rate": 4.524590163934426e-05, | |
| "loss": 0.2076, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 115.77791336180599, | |
| "grad_norm": 4.9167680740356445, | |
| "learning_rate": 4.5114754098360655e-05, | |
| "loss": 0.1991, | |
| "step": 23720 | |
| }, | |
| { | |
| "epoch": 115.87553386211104, | |
| "grad_norm": 4.05020809173584, | |
| "learning_rate": 4.498360655737705e-05, | |
| "loss": 0.2154, | |
| "step": 23740 | |
| }, | |
| { | |
| "epoch": 115.97315436241611, | |
| "grad_norm": 5.213686466217041, | |
| "learning_rate": 4.485245901639345e-05, | |
| "loss": 0.1893, | |
| "step": 23760 | |
| }, | |
| { | |
| "epoch": 116.07077486272117, | |
| "grad_norm": 2.9788458347320557, | |
| "learning_rate": 4.472131147540984e-05, | |
| "loss": 0.2015, | |
| "step": 23780 | |
| }, | |
| { | |
| "epoch": 116.16839536302624, | |
| "grad_norm": 3.5112173557281494, | |
| "learning_rate": 4.459016393442623e-05, | |
| "loss": 0.2058, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 116.2660158633313, | |
| "grad_norm": 3.3048148155212402, | |
| "learning_rate": 4.445901639344263e-05, | |
| "loss": 0.2107, | |
| "step": 23820 | |
| }, | |
| { | |
| "epoch": 116.36363636363636, | |
| "grad_norm": 3.895615339279175, | |
| "learning_rate": 4.432786885245902e-05, | |
| "loss": 0.1744, | |
| "step": 23840 | |
| }, | |
| { | |
| "epoch": 116.46125686394143, | |
| "grad_norm": 3.7658941745758057, | |
| "learning_rate": 4.4196721311475415e-05, | |
| "loss": 0.1935, | |
| "step": 23860 | |
| }, | |
| { | |
| "epoch": 116.5588773642465, | |
| "grad_norm": 3.4818522930145264, | |
| "learning_rate": 4.406557377049181e-05, | |
| "loss": 0.2028, | |
| "step": 23880 | |
| }, | |
| { | |
| "epoch": 116.65649786455155, | |
| "grad_norm": 2.971785068511963, | |
| "learning_rate": 4.3934426229508194e-05, | |
| "loss": 0.1993, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 116.75411836485662, | |
| "grad_norm": 4.34529447555542, | |
| "learning_rate": 4.380327868852459e-05, | |
| "loss": 0.1887, | |
| "step": 23920 | |
| }, | |
| { | |
| "epoch": 116.85173886516168, | |
| "grad_norm": 3.1781437397003174, | |
| "learning_rate": 4.3672131147540986e-05, | |
| "loss": 0.2241, | |
| "step": 23940 | |
| }, | |
| { | |
| "epoch": 116.94935936546675, | |
| "grad_norm": 3.8372061252593994, | |
| "learning_rate": 4.3540983606557375e-05, | |
| "loss": 0.2103, | |
| "step": 23960 | |
| }, | |
| { | |
| "epoch": 117.04697986577182, | |
| "grad_norm": 3.981776237487793, | |
| "learning_rate": 4.340983606557377e-05, | |
| "loss": 0.1859, | |
| "step": 23980 | |
| }, | |
| { | |
| "epoch": 117.14460036607687, | |
| "grad_norm": 4.0308685302734375, | |
| "learning_rate": 4.327868852459017e-05, | |
| "loss": 0.1841, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 117.24222086638194, | |
| "grad_norm": 3.2919681072235107, | |
| "learning_rate": 4.3147540983606564e-05, | |
| "loss": 0.1964, | |
| "step": 24020 | |
| }, | |
| { | |
| "epoch": 117.33984136668701, | |
| "grad_norm": 4.155374050140381, | |
| "learning_rate": 4.301639344262295e-05, | |
| "loss": 0.1896, | |
| "step": 24040 | |
| }, | |
| { | |
| "epoch": 117.43746186699207, | |
| "grad_norm": 3.5654046535491943, | |
| "learning_rate": 4.288524590163935e-05, | |
| "loss": 0.1913, | |
| "step": 24060 | |
| }, | |
| { | |
| "epoch": 117.53508236729714, | |
| "grad_norm": 2.805769681930542, | |
| "learning_rate": 4.2754098360655745e-05, | |
| "loss": 0.1995, | |
| "step": 24080 | |
| }, | |
| { | |
| "epoch": 117.63270286760219, | |
| "grad_norm": 2.990482807159424, | |
| "learning_rate": 4.262295081967213e-05, | |
| "loss": 0.2034, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 117.73032336790726, | |
| "grad_norm": 3.236433506011963, | |
| "learning_rate": 4.2491803278688524e-05, | |
| "loss": 0.1948, | |
| "step": 24120 | |
| }, | |
| { | |
| "epoch": 117.82794386821233, | |
| "grad_norm": 4.235846996307373, | |
| "learning_rate": 4.236065573770492e-05, | |
| "loss": 0.2014, | |
| "step": 24140 | |
| }, | |
| { | |
| "epoch": 117.92556436851739, | |
| "grad_norm": 3.238041400909424, | |
| "learning_rate": 4.222950819672131e-05, | |
| "loss": 0.2106, | |
| "step": 24160 | |
| }, | |
| { | |
| "epoch": 118.02318486882245, | |
| "grad_norm": 3.18979811668396, | |
| "learning_rate": 4.2098360655737706e-05, | |
| "loss": 0.1918, | |
| "step": 24180 | |
| }, | |
| { | |
| "epoch": 118.12080536912751, | |
| "grad_norm": 2.699910879135132, | |
| "learning_rate": 4.19672131147541e-05, | |
| "loss": 0.2121, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 118.21842586943258, | |
| "grad_norm": 3.6096503734588623, | |
| "learning_rate": 4.183606557377049e-05, | |
| "loss": 0.2165, | |
| "step": 24220 | |
| }, | |
| { | |
| "epoch": 118.31604636973765, | |
| "grad_norm": 3.384746789932251, | |
| "learning_rate": 4.170491803278689e-05, | |
| "loss": 0.1884, | |
| "step": 24240 | |
| }, | |
| { | |
| "epoch": 118.4136668700427, | |
| "grad_norm": 3.606003761291504, | |
| "learning_rate": 4.1573770491803283e-05, | |
| "loss": 0.1932, | |
| "step": 24260 | |
| }, | |
| { | |
| "epoch": 118.51128737034777, | |
| "grad_norm": 2.9768433570861816, | |
| "learning_rate": 4.144262295081967e-05, | |
| "loss": 0.1802, | |
| "step": 24280 | |
| }, | |
| { | |
| "epoch": 118.60890787065284, | |
| "grad_norm": 3.784608840942383, | |
| "learning_rate": 4.131147540983607e-05, | |
| "loss": 0.1919, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 118.7065283709579, | |
| "grad_norm": 3.2883870601654053, | |
| "learning_rate": 4.118032786885246e-05, | |
| "loss": 0.2015, | |
| "step": 24320 | |
| }, | |
| { | |
| "epoch": 118.80414887126297, | |
| "grad_norm": 4.078542232513428, | |
| "learning_rate": 4.1049180327868854e-05, | |
| "loss": 0.1994, | |
| "step": 24340 | |
| }, | |
| { | |
| "epoch": 118.90176937156802, | |
| "grad_norm": 3.7962749004364014, | |
| "learning_rate": 4.0918032786885244e-05, | |
| "loss": 0.1958, | |
| "step": 24360 | |
| }, | |
| { | |
| "epoch": 118.99938987187309, | |
| "grad_norm": 3.4484541416168213, | |
| "learning_rate": 4.078688524590164e-05, | |
| "loss": 0.1926, | |
| "step": 24380 | |
| }, | |
| { | |
| "epoch": 119.09701037217816, | |
| "grad_norm": 2.9271483421325684, | |
| "learning_rate": 4.0655737704918036e-05, | |
| "loss": 0.1818, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 119.19463087248322, | |
| "grad_norm": 5.304340839385986, | |
| "learning_rate": 4.0524590163934425e-05, | |
| "loss": 0.192, | |
| "step": 24420 | |
| }, | |
| { | |
| "epoch": 119.29225137278829, | |
| "grad_norm": 4.88720178604126, | |
| "learning_rate": 4.039344262295082e-05, | |
| "loss": 0.1975, | |
| "step": 24440 | |
| }, | |
| { | |
| "epoch": 119.38987187309336, | |
| "grad_norm": 3.7572522163391113, | |
| "learning_rate": 4.026229508196722e-05, | |
| "loss": 0.1856, | |
| "step": 24460 | |
| }, | |
| { | |
| "epoch": 119.48749237339841, | |
| "grad_norm": 3.3659627437591553, | |
| "learning_rate": 4.013114754098361e-05, | |
| "loss": 0.1961, | |
| "step": 24480 | |
| }, | |
| { | |
| "epoch": 119.58511287370348, | |
| "grad_norm": 3.1397533416748047, | |
| "learning_rate": 4e-05, | |
| "loss": 0.1991, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 119.68273337400854, | |
| "grad_norm": 3.9070587158203125, | |
| "learning_rate": 3.98688524590164e-05, | |
| "loss": 0.1946, | |
| "step": 24520 | |
| }, | |
| { | |
| "epoch": 119.7803538743136, | |
| "grad_norm": 3.3265631198883057, | |
| "learning_rate": 3.973770491803279e-05, | |
| "loss": 0.194, | |
| "step": 24540 | |
| }, | |
| { | |
| "epoch": 119.87797437461867, | |
| "grad_norm": 3.5021114349365234, | |
| "learning_rate": 3.960655737704918e-05, | |
| "loss": 0.2084, | |
| "step": 24560 | |
| }, | |
| { | |
| "epoch": 119.97559487492373, | |
| "grad_norm": 3.7133572101593018, | |
| "learning_rate": 3.9475409836065574e-05, | |
| "loss": 0.2102, | |
| "step": 24580 | |
| }, | |
| { | |
| "epoch": 120.0732153752288, | |
| "grad_norm": 3.27691912651062, | |
| "learning_rate": 3.934426229508197e-05, | |
| "loss": 0.1864, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 120.17083587553387, | |
| "grad_norm": 4.74030065536499, | |
| "learning_rate": 3.921311475409836e-05, | |
| "loss": 0.1967, | |
| "step": 24620 | |
| }, | |
| { | |
| "epoch": 120.26845637583892, | |
| "grad_norm": 4.080429553985596, | |
| "learning_rate": 3.9081967213114756e-05, | |
| "loss": 0.1888, | |
| "step": 24640 | |
| }, | |
| { | |
| "epoch": 120.36607687614399, | |
| "grad_norm": 3.5443646907806396, | |
| "learning_rate": 3.895081967213115e-05, | |
| "loss": 0.199, | |
| "step": 24660 | |
| }, | |
| { | |
| "epoch": 120.46369737644905, | |
| "grad_norm": 4.0403923988342285, | |
| "learning_rate": 3.881967213114754e-05, | |
| "loss": 0.2002, | |
| "step": 24680 | |
| }, | |
| { | |
| "epoch": 120.56131787675412, | |
| "grad_norm": 4.144064426422119, | |
| "learning_rate": 3.868852459016394e-05, | |
| "loss": 0.193, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 120.65893837705919, | |
| "grad_norm": 5.478573799133301, | |
| "learning_rate": 3.8557377049180334e-05, | |
| "loss": 0.19, | |
| "step": 24720 | |
| }, | |
| { | |
| "epoch": 120.75655887736424, | |
| "grad_norm": 3.799931287765503, | |
| "learning_rate": 3.842622950819672e-05, | |
| "loss": 0.1876, | |
| "step": 24740 | |
| }, | |
| { | |
| "epoch": 120.85417937766931, | |
| "grad_norm": 3.1754050254821777, | |
| "learning_rate": 3.829508196721311e-05, | |
| "loss": 0.1881, | |
| "step": 24760 | |
| }, | |
| { | |
| "epoch": 120.95179987797438, | |
| "grad_norm": 3.4417614936828613, | |
| "learning_rate": 3.816393442622951e-05, | |
| "loss": 0.2023, | |
| "step": 24780 | |
| }, | |
| { | |
| "epoch": 121.04942037827944, | |
| "grad_norm": 2.689237594604492, | |
| "learning_rate": 3.8032786885245905e-05, | |
| "loss": 0.1918, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 121.1470408785845, | |
| "grad_norm": 2.6631858348846436, | |
| "learning_rate": 3.7901639344262294e-05, | |
| "loss": 0.1681, | |
| "step": 24820 | |
| }, | |
| { | |
| "epoch": 121.24466137888956, | |
| "grad_norm": 4.315155982971191, | |
| "learning_rate": 3.777049180327869e-05, | |
| "loss": 0.2037, | |
| "step": 24840 | |
| }, | |
| { | |
| "epoch": 121.34228187919463, | |
| "grad_norm": 3.0183377265930176, | |
| "learning_rate": 3.7639344262295086e-05, | |
| "loss": 0.1901, | |
| "step": 24860 | |
| }, | |
| { | |
| "epoch": 121.4399023794997, | |
| "grad_norm": 2.389106035232544, | |
| "learning_rate": 3.7508196721311476e-05, | |
| "loss": 0.2162, | |
| "step": 24880 | |
| }, | |
| { | |
| "epoch": 121.53752287980475, | |
| "grad_norm": 3.5406622886657715, | |
| "learning_rate": 3.737704918032787e-05, | |
| "loss": 0.1887, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 121.63514338010982, | |
| "grad_norm": 3.272151231765747, | |
| "learning_rate": 3.724590163934427e-05, | |
| "loss": 0.2156, | |
| "step": 24920 | |
| }, | |
| { | |
| "epoch": 121.7327638804149, | |
| "grad_norm": 3.1407511234283447, | |
| "learning_rate": 3.711475409836066e-05, | |
| "loss": 0.1833, | |
| "step": 24940 | |
| }, | |
| { | |
| "epoch": 121.83038438071995, | |
| "grad_norm": 3.3532192707061768, | |
| "learning_rate": 3.698360655737705e-05, | |
| "loss": 0.1858, | |
| "step": 24960 | |
| }, | |
| { | |
| "epoch": 121.92800488102502, | |
| "grad_norm": 3.1784684658050537, | |
| "learning_rate": 3.685245901639344e-05, | |
| "loss": 0.2, | |
| "step": 24980 | |
| }, | |
| { | |
| "epoch": 122.02562538133007, | |
| "grad_norm": 3.5837671756744385, | |
| "learning_rate": 3.672131147540984e-05, | |
| "loss": 0.2073, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 122.12324588163514, | |
| "grad_norm": 3.2040927410125732, | |
| "learning_rate": 3.659016393442623e-05, | |
| "loss": 0.1712, | |
| "step": 25020 | |
| }, | |
| { | |
| "epoch": 122.22086638194021, | |
| "grad_norm": 3.3119306564331055, | |
| "learning_rate": 3.6459016393442625e-05, | |
| "loss": 0.1879, | |
| "step": 25040 | |
| }, | |
| { | |
| "epoch": 122.31848688224527, | |
| "grad_norm": 3.2154297828674316, | |
| "learning_rate": 3.632786885245902e-05, | |
| "loss": 0.1976, | |
| "step": 25060 | |
| }, | |
| { | |
| "epoch": 122.41610738255034, | |
| "grad_norm": 2.9070613384246826, | |
| "learning_rate": 3.619672131147541e-05, | |
| "loss": 0.2077, | |
| "step": 25080 | |
| }, | |
| { | |
| "epoch": 122.5137278828554, | |
| "grad_norm": 3.866682529449463, | |
| "learning_rate": 3.6065573770491806e-05, | |
| "loss": 0.1673, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 122.61134838316046, | |
| "grad_norm": 4.236519813537598, | |
| "learning_rate": 3.59344262295082e-05, | |
| "loss": 0.2096, | |
| "step": 25120 | |
| }, | |
| { | |
| "epoch": 122.70896888346553, | |
| "grad_norm": 3.386503219604492, | |
| "learning_rate": 3.580327868852459e-05, | |
| "loss": 0.1962, | |
| "step": 25140 | |
| }, | |
| { | |
| "epoch": 122.80658938377059, | |
| "grad_norm": 2.7001540660858154, | |
| "learning_rate": 3.567213114754099e-05, | |
| "loss": 0.1731, | |
| "step": 25160 | |
| }, | |
| { | |
| "epoch": 122.90420988407566, | |
| "grad_norm": 3.281818389892578, | |
| "learning_rate": 3.554098360655738e-05, | |
| "loss": 0.2225, | |
| "step": 25180 | |
| }, | |
| { | |
| "epoch": 123.00183038438072, | |
| "grad_norm": 4.1742095947265625, | |
| "learning_rate": 3.5409836065573773e-05, | |
| "loss": 0.1947, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 123.09945088468578, | |
| "grad_norm": 3.328521490097046, | |
| "learning_rate": 3.527868852459016e-05, | |
| "loss": 0.1607, | |
| "step": 25220 | |
| }, | |
| { | |
| "epoch": 123.19707138499085, | |
| "grad_norm": 4.815507888793945, | |
| "learning_rate": 3.514754098360656e-05, | |
| "loss": 0.1977, | |
| "step": 25240 | |
| }, | |
| { | |
| "epoch": 123.29469188529592, | |
| "grad_norm": 3.736438512802124, | |
| "learning_rate": 3.5016393442622955e-05, | |
| "loss": 0.2105, | |
| "step": 25260 | |
| }, | |
| { | |
| "epoch": 123.39231238560097, | |
| "grad_norm": 3.4552454948425293, | |
| "learning_rate": 3.4885245901639344e-05, | |
| "loss": 0.2002, | |
| "step": 25280 | |
| }, | |
| { | |
| "epoch": 123.48993288590604, | |
| "grad_norm": 2.3649439811706543, | |
| "learning_rate": 3.475409836065574e-05, | |
| "loss": 0.1823, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 123.5875533862111, | |
| "grad_norm": 3.0043177604675293, | |
| "learning_rate": 3.462295081967214e-05, | |
| "loss": 0.1922, | |
| "step": 25320 | |
| }, | |
| { | |
| "epoch": 123.68517388651617, | |
| "grad_norm": 4.2065253257751465, | |
| "learning_rate": 3.4491803278688526e-05, | |
| "loss": 0.1943, | |
| "step": 25340 | |
| }, | |
| { | |
| "epoch": 123.78279438682124, | |
| "grad_norm": 3.961331605911255, | |
| "learning_rate": 3.436065573770492e-05, | |
| "loss": 0.1859, | |
| "step": 25360 | |
| }, | |
| { | |
| "epoch": 123.88041488712629, | |
| "grad_norm": 4.434045791625977, | |
| "learning_rate": 3.422950819672131e-05, | |
| "loss": 0.1985, | |
| "step": 25380 | |
| }, | |
| { | |
| "epoch": 123.97803538743136, | |
| "grad_norm": 2.87196946144104, | |
| "learning_rate": 3.409836065573771e-05, | |
| "loss": 0.2046, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 124.07565588773643, | |
| "grad_norm": 2.9702465534210205, | |
| "learning_rate": 3.39672131147541e-05, | |
| "loss": 0.1665, | |
| "step": 25420 | |
| }, | |
| { | |
| "epoch": 124.17327638804149, | |
| "grad_norm": 4.106092929840088, | |
| "learning_rate": 3.383606557377049e-05, | |
| "loss": 0.1966, | |
| "step": 25440 | |
| }, | |
| { | |
| "epoch": 124.27089688834656, | |
| "grad_norm": 3.1091065406799316, | |
| "learning_rate": 3.370491803278689e-05, | |
| "loss": 0.2078, | |
| "step": 25460 | |
| }, | |
| { | |
| "epoch": 124.36851738865161, | |
| "grad_norm": 3.4036500453948975, | |
| "learning_rate": 3.357377049180328e-05, | |
| "loss": 0.1814, | |
| "step": 25480 | |
| }, | |
| { | |
| "epoch": 124.46613788895668, | |
| "grad_norm": 3.7814347743988037, | |
| "learning_rate": 3.3442622950819675e-05, | |
| "loss": 0.195, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 124.56375838926175, | |
| "grad_norm": 4.475528240203857, | |
| "learning_rate": 3.331147540983607e-05, | |
| "loss": 0.1984, | |
| "step": 25520 | |
| }, | |
| { | |
| "epoch": 124.6613788895668, | |
| "grad_norm": 3.35787296295166, | |
| "learning_rate": 3.318032786885246e-05, | |
| "loss": 0.1809, | |
| "step": 25540 | |
| }, | |
| { | |
| "epoch": 124.75899938987187, | |
| "grad_norm": 3.594639301300049, | |
| "learning_rate": 3.3049180327868857e-05, | |
| "loss": 0.1844, | |
| "step": 25560 | |
| }, | |
| { | |
| "epoch": 124.85661989017694, | |
| "grad_norm": 3.5354974269866943, | |
| "learning_rate": 3.291803278688525e-05, | |
| "loss": 0.1953, | |
| "step": 25580 | |
| }, | |
| { | |
| "epoch": 124.954240390482, | |
| "grad_norm": 2.810798168182373, | |
| "learning_rate": 3.2786885245901635e-05, | |
| "loss": 0.1911, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 125.05186089078707, | |
| "grad_norm": 3.3332059383392334, | |
| "learning_rate": 3.265573770491803e-05, | |
| "loss": 0.1593, | |
| "step": 25620 | |
| }, | |
| { | |
| "epoch": 125.14948139109212, | |
| "grad_norm": 2.8333628177642822, | |
| "learning_rate": 3.252459016393443e-05, | |
| "loss": 0.1709, | |
| "step": 25640 | |
| }, | |
| { | |
| "epoch": 125.2471018913972, | |
| "grad_norm": 2.9144675731658936, | |
| "learning_rate": 3.2393442622950824e-05, | |
| "loss": 0.2073, | |
| "step": 25660 | |
| }, | |
| { | |
| "epoch": 125.34472239170226, | |
| "grad_norm": 3.185001850128174, | |
| "learning_rate": 3.226229508196721e-05, | |
| "loss": 0.1665, | |
| "step": 25680 | |
| }, | |
| { | |
| "epoch": 125.44234289200732, | |
| "grad_norm": 2.8228659629821777, | |
| "learning_rate": 3.213114754098361e-05, | |
| "loss": 0.163, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 125.53996339231239, | |
| "grad_norm": 2.8857967853546143, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.2057, | |
| "step": 25720 | |
| }, | |
| { | |
| "epoch": 125.63758389261746, | |
| "grad_norm": 2.9855597019195557, | |
| "learning_rate": 3.1868852459016395e-05, | |
| "loss": 0.1872, | |
| "step": 25740 | |
| }, | |
| { | |
| "epoch": 125.73520439292251, | |
| "grad_norm": 3.5702884197235107, | |
| "learning_rate": 3.173770491803279e-05, | |
| "loss": 0.1992, | |
| "step": 25760 | |
| }, | |
| { | |
| "epoch": 125.83282489322758, | |
| "grad_norm": 5.302943706512451, | |
| "learning_rate": 3.160655737704919e-05, | |
| "loss": 0.1995, | |
| "step": 25780 | |
| }, | |
| { | |
| "epoch": 125.93044539353264, | |
| "grad_norm": 3.9966931343078613, | |
| "learning_rate": 3.1475409836065576e-05, | |
| "loss": 0.2046, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 126.0280658938377, | |
| "grad_norm": 2.6985690593719482, | |
| "learning_rate": 3.1344262295081966e-05, | |
| "loss": 0.2084, | |
| "step": 25820 | |
| }, | |
| { | |
| "epoch": 126.12568639414278, | |
| "grad_norm": 3.317439079284668, | |
| "learning_rate": 3.121311475409836e-05, | |
| "loss": 0.1971, | |
| "step": 25840 | |
| }, | |
| { | |
| "epoch": 126.22330689444783, | |
| "grad_norm": 3.718867301940918, | |
| "learning_rate": 3.108196721311475e-05, | |
| "loss": 0.1759, | |
| "step": 25860 | |
| }, | |
| { | |
| "epoch": 126.3209273947529, | |
| "grad_norm": 3.7418856620788574, | |
| "learning_rate": 3.095081967213115e-05, | |
| "loss": 0.2067, | |
| "step": 25880 | |
| }, | |
| { | |
| "epoch": 126.41854789505797, | |
| "grad_norm": 3.9287869930267334, | |
| "learning_rate": 3.0819672131147544e-05, | |
| "loss": 0.2032, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 126.51616839536302, | |
| "grad_norm": 1.9036474227905273, | |
| "learning_rate": 3.068852459016393e-05, | |
| "loss": 0.1905, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 126.6137888956681, | |
| "grad_norm": 2.602092981338501, | |
| "learning_rate": 3.055737704918033e-05, | |
| "loss": 0.1851, | |
| "step": 25940 | |
| }, | |
| { | |
| "epoch": 126.71140939597315, | |
| "grad_norm": 3.2280685901641846, | |
| "learning_rate": 3.0426229508196725e-05, | |
| "loss": 0.1938, | |
| "step": 25960 | |
| }, | |
| { | |
| "epoch": 126.80902989627822, | |
| "grad_norm": 4.606971263885498, | |
| "learning_rate": 3.0295081967213118e-05, | |
| "loss": 0.1644, | |
| "step": 25980 | |
| }, | |
| { | |
| "epoch": 126.90665039658329, | |
| "grad_norm": 3.755833864212036, | |
| "learning_rate": 3.016393442622951e-05, | |
| "loss": 0.1836, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 127.00427089688834, | |
| "grad_norm": 3.572577953338623, | |
| "learning_rate": 3.00327868852459e-05, | |
| "loss": 0.1733, | |
| "step": 26020 | |
| }, | |
| { | |
| "epoch": 127.10189139719341, | |
| "grad_norm": 3.1435470581054688, | |
| "learning_rate": 2.9901639344262293e-05, | |
| "loss": 0.1815, | |
| "step": 26040 | |
| }, | |
| { | |
| "epoch": 127.19951189749847, | |
| "grad_norm": 3.3165206909179688, | |
| "learning_rate": 2.977049180327869e-05, | |
| "loss": 0.183, | |
| "step": 26060 | |
| }, | |
| { | |
| "epoch": 127.29713239780354, | |
| "grad_norm": 3.269935131072998, | |
| "learning_rate": 2.963934426229508e-05, | |
| "loss": 0.1854, | |
| "step": 26080 | |
| }, | |
| { | |
| "epoch": 127.3947528981086, | |
| "grad_norm": 3.6275577545166016, | |
| "learning_rate": 2.9508196721311478e-05, | |
| "loss": 0.1798, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 127.49237339841366, | |
| "grad_norm": 3.3832483291625977, | |
| "learning_rate": 2.937704918032787e-05, | |
| "loss": 0.1722, | |
| "step": 26120 | |
| }, | |
| { | |
| "epoch": 127.58999389871873, | |
| "grad_norm": 3.828364610671997, | |
| "learning_rate": 2.9245901639344263e-05, | |
| "loss": 0.1853, | |
| "step": 26140 | |
| }, | |
| { | |
| "epoch": 127.6876143990238, | |
| "grad_norm": 3.7207860946655273, | |
| "learning_rate": 2.911475409836066e-05, | |
| "loss": 0.1948, | |
| "step": 26160 | |
| }, | |
| { | |
| "epoch": 127.78523489932886, | |
| "grad_norm": 3.281031847000122, | |
| "learning_rate": 2.8983606557377052e-05, | |
| "loss": 0.2063, | |
| "step": 26180 | |
| }, | |
| { | |
| "epoch": 127.88285539963393, | |
| "grad_norm": 3.1116421222686768, | |
| "learning_rate": 2.8852459016393445e-05, | |
| "loss": 0.191, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 127.98047589993898, | |
| "grad_norm": 4.254022121429443, | |
| "learning_rate": 2.872131147540984e-05, | |
| "loss": 0.202, | |
| "step": 26220 | |
| }, | |
| { | |
| "epoch": 128.07809640024405, | |
| "grad_norm": 3.1279819011688232, | |
| "learning_rate": 2.8590163934426227e-05, | |
| "loss": 0.1934, | |
| "step": 26240 | |
| }, | |
| { | |
| "epoch": 128.1757169005491, | |
| "grad_norm": 3.3977596759796143, | |
| "learning_rate": 2.8459016393442623e-05, | |
| "loss": 0.1807, | |
| "step": 26260 | |
| }, | |
| { | |
| "epoch": 128.2733374008542, | |
| "grad_norm": 5.261218070983887, | |
| "learning_rate": 2.8327868852459016e-05, | |
| "loss": 0.17, | |
| "step": 26280 | |
| }, | |
| { | |
| "epoch": 128.37095790115924, | |
| "grad_norm": 4.153654098510742, | |
| "learning_rate": 2.819672131147541e-05, | |
| "loss": 0.1891, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 128.4685784014643, | |
| "grad_norm": 3.449397563934326, | |
| "learning_rate": 2.8065573770491805e-05, | |
| "loss": 0.1846, | |
| "step": 26320 | |
| }, | |
| { | |
| "epoch": 128.56619890176938, | |
| "grad_norm": 3.43442702293396, | |
| "learning_rate": 2.7934426229508198e-05, | |
| "loss": 0.1807, | |
| "step": 26340 | |
| }, | |
| { | |
| "epoch": 128.66381940207444, | |
| "grad_norm": 2.9243948459625244, | |
| "learning_rate": 2.7803278688524594e-05, | |
| "loss": 0.1704, | |
| "step": 26360 | |
| }, | |
| { | |
| "epoch": 128.7614399023795, | |
| "grad_norm": 3.9830613136291504, | |
| "learning_rate": 2.7672131147540987e-05, | |
| "loss": 0.1909, | |
| "step": 26380 | |
| }, | |
| { | |
| "epoch": 128.85906040268458, | |
| "grad_norm": 3.0765368938446045, | |
| "learning_rate": 2.754098360655738e-05, | |
| "loss": 0.1954, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 128.95668090298963, | |
| "grad_norm": 2.540853977203369, | |
| "learning_rate": 2.7409836065573775e-05, | |
| "loss": 0.2116, | |
| "step": 26420 | |
| }, | |
| { | |
| "epoch": 129.0543014032947, | |
| "grad_norm": 3.55985689163208, | |
| "learning_rate": 2.7278688524590168e-05, | |
| "loss": 0.193, | |
| "step": 26440 | |
| }, | |
| { | |
| "epoch": 129.15192190359974, | |
| "grad_norm": 3.974700450897217, | |
| "learning_rate": 2.7147540983606558e-05, | |
| "loss": 0.1673, | |
| "step": 26460 | |
| }, | |
| { | |
| "epoch": 129.24954240390483, | |
| "grad_norm": 4.614022731781006, | |
| "learning_rate": 2.701639344262295e-05, | |
| "loss": 0.1968, | |
| "step": 26480 | |
| }, | |
| { | |
| "epoch": 129.34716290420988, | |
| "grad_norm": 3.9277889728546143, | |
| "learning_rate": 2.6885245901639343e-05, | |
| "loss": 0.1643, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 129.44478340451494, | |
| "grad_norm": 3.4351913928985596, | |
| "learning_rate": 2.675409836065574e-05, | |
| "loss": 0.1903, | |
| "step": 26520 | |
| }, | |
| { | |
| "epoch": 129.54240390482002, | |
| "grad_norm": 3.3347392082214355, | |
| "learning_rate": 2.6622950819672132e-05, | |
| "loss": 0.1789, | |
| "step": 26540 | |
| }, | |
| { | |
| "epoch": 129.64002440512508, | |
| "grad_norm": 3.970414161682129, | |
| "learning_rate": 2.6491803278688525e-05, | |
| "loss": 0.1994, | |
| "step": 26560 | |
| }, | |
| { | |
| "epoch": 129.73764490543013, | |
| "grad_norm": 3.648883819580078, | |
| "learning_rate": 2.636065573770492e-05, | |
| "loss": 0.1877, | |
| "step": 26580 | |
| }, | |
| { | |
| "epoch": 129.8352654057352, | |
| "grad_norm": 3.3394792079925537, | |
| "learning_rate": 2.6229508196721314e-05, | |
| "loss": 0.1838, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 129.93288590604027, | |
| "grad_norm": 2.924798011779785, | |
| "learning_rate": 2.6098360655737706e-05, | |
| "loss": 0.1913, | |
| "step": 26620 | |
| }, | |
| { | |
| "epoch": 130.03050640634532, | |
| "grad_norm": 3.448457717895508, | |
| "learning_rate": 2.5967213114754103e-05, | |
| "loss": 0.1993, | |
| "step": 26640 | |
| }, | |
| { | |
| "epoch": 130.1281269066504, | |
| "grad_norm": 2.458868980407715, | |
| "learning_rate": 2.5836065573770492e-05, | |
| "loss": 0.1996, | |
| "step": 26660 | |
| }, | |
| { | |
| "epoch": 130.22574740695546, | |
| "grad_norm": 4.39287805557251, | |
| "learning_rate": 2.5704918032786885e-05, | |
| "loss": 0.1894, | |
| "step": 26680 | |
| }, | |
| { | |
| "epoch": 130.32336790726052, | |
| "grad_norm": 3.347745180130005, | |
| "learning_rate": 2.5573770491803277e-05, | |
| "loss": 0.1642, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 130.4209884075656, | |
| "grad_norm": 3.0466248989105225, | |
| "learning_rate": 2.5442622950819674e-05, | |
| "loss": 0.1976, | |
| "step": 26720 | |
| }, | |
| { | |
| "epoch": 130.51860890787066, | |
| "grad_norm": 2.580834150314331, | |
| "learning_rate": 2.5311475409836066e-05, | |
| "loss": 0.1764, | |
| "step": 26740 | |
| }, | |
| { | |
| "epoch": 130.6162294081757, | |
| "grad_norm": 3.6775128841400146, | |
| "learning_rate": 2.518032786885246e-05, | |
| "loss": 0.1725, | |
| "step": 26760 | |
| }, | |
| { | |
| "epoch": 130.71384990848077, | |
| "grad_norm": 3.829058885574341, | |
| "learning_rate": 2.5049180327868855e-05, | |
| "loss": 0.1776, | |
| "step": 26780 | |
| }, | |
| { | |
| "epoch": 130.81147040878585, | |
| "grad_norm": 4.047943592071533, | |
| "learning_rate": 2.4918032786885248e-05, | |
| "loss": 0.1943, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 130.9090909090909, | |
| "grad_norm": 3.2828705310821533, | |
| "learning_rate": 2.478688524590164e-05, | |
| "loss": 0.1864, | |
| "step": 26820 | |
| }, | |
| { | |
| "epoch": 131.00671140939596, | |
| "grad_norm": 4.2563958168029785, | |
| "learning_rate": 2.4655737704918033e-05, | |
| "loss": 0.1838, | |
| "step": 26840 | |
| }, | |
| { | |
| "epoch": 131.10433190970105, | |
| "grad_norm": 3.279503583908081, | |
| "learning_rate": 2.4524590163934426e-05, | |
| "loss": 0.2027, | |
| "step": 26860 | |
| }, | |
| { | |
| "epoch": 131.2019524100061, | |
| "grad_norm": 3.8052897453308105, | |
| "learning_rate": 2.4393442622950822e-05, | |
| "loss": 0.1916, | |
| "step": 26880 | |
| }, | |
| { | |
| "epoch": 131.29957291031116, | |
| "grad_norm": 3.12294602394104, | |
| "learning_rate": 2.4262295081967215e-05, | |
| "loss": 0.1542, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 131.39719341061624, | |
| "grad_norm": 4.636548042297363, | |
| "learning_rate": 2.4131147540983608e-05, | |
| "loss": 0.1677, | |
| "step": 26920 | |
| }, | |
| { | |
| "epoch": 131.4948139109213, | |
| "grad_norm": 2.8608415126800537, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.194, | |
| "step": 26940 | |
| }, | |
| { | |
| "epoch": 131.59243441122635, | |
| "grad_norm": 3.7946908473968506, | |
| "learning_rate": 2.3868852459016393e-05, | |
| "loss": 0.1764, | |
| "step": 26960 | |
| }, | |
| { | |
| "epoch": 131.69005491153143, | |
| "grad_norm": 3.1568832397460938, | |
| "learning_rate": 2.373770491803279e-05, | |
| "loss": 0.1865, | |
| "step": 26980 | |
| }, | |
| { | |
| "epoch": 131.7876754118365, | |
| "grad_norm": 4.103198528289795, | |
| "learning_rate": 2.3606557377049182e-05, | |
| "loss": 0.1753, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 131.88529591214154, | |
| "grad_norm": 3.655327796936035, | |
| "learning_rate": 2.3475409836065575e-05, | |
| "loss": 0.1965, | |
| "step": 27020 | |
| }, | |
| { | |
| "epoch": 131.98291641244663, | |
| "grad_norm": 3.908200263977051, | |
| "learning_rate": 2.3344262295081968e-05, | |
| "loss": 0.1791, | |
| "step": 27040 | |
| }, | |
| { | |
| "epoch": 132.08053691275168, | |
| "grad_norm": 3.2557129859924316, | |
| "learning_rate": 2.321311475409836e-05, | |
| "loss": 0.1715, | |
| "step": 27060 | |
| }, | |
| { | |
| "epoch": 132.17815741305674, | |
| "grad_norm": 3.102268695831299, | |
| "learning_rate": 2.3081967213114757e-05, | |
| "loss": 0.191, | |
| "step": 27080 | |
| }, | |
| { | |
| "epoch": 132.2757779133618, | |
| "grad_norm": 2.7028493881225586, | |
| "learning_rate": 2.295081967213115e-05, | |
| "loss": 0.1802, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 132.37339841366688, | |
| "grad_norm": 2.5240046977996826, | |
| "learning_rate": 2.2819672131147542e-05, | |
| "loss": 0.1618, | |
| "step": 27120 | |
| }, | |
| { | |
| "epoch": 132.47101891397193, | |
| "grad_norm": 3.0410265922546387, | |
| "learning_rate": 2.2688524590163935e-05, | |
| "loss": 0.1833, | |
| "step": 27140 | |
| }, | |
| { | |
| "epoch": 132.568639414277, | |
| "grad_norm": 3.677824020385742, | |
| "learning_rate": 2.2557377049180328e-05, | |
| "loss": 0.1804, | |
| "step": 27160 | |
| }, | |
| { | |
| "epoch": 132.66625991458207, | |
| "grad_norm": 2.826828718185425, | |
| "learning_rate": 2.2426229508196724e-05, | |
| "loss": 0.1915, | |
| "step": 27180 | |
| }, | |
| { | |
| "epoch": 132.76388041488713, | |
| "grad_norm": 4.393260955810547, | |
| "learning_rate": 2.2295081967213117e-05, | |
| "loss": 0.1952, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 132.86150091519218, | |
| "grad_norm": 3.1146349906921387, | |
| "learning_rate": 2.216393442622951e-05, | |
| "loss": 0.1973, | |
| "step": 27220 | |
| }, | |
| { | |
| "epoch": 132.95912141549726, | |
| "grad_norm": 3.343693494796753, | |
| "learning_rate": 2.2032786885245905e-05, | |
| "loss": 0.1822, | |
| "step": 27240 | |
| }, | |
| { | |
| "epoch": 133.05674191580232, | |
| "grad_norm": 3.0667552947998047, | |
| "learning_rate": 2.1901639344262295e-05, | |
| "loss": 0.1807, | |
| "step": 27260 | |
| }, | |
| { | |
| "epoch": 133.15436241610738, | |
| "grad_norm": 3.497859001159668, | |
| "learning_rate": 2.1770491803278688e-05, | |
| "loss": 0.1525, | |
| "step": 27280 | |
| }, | |
| { | |
| "epoch": 133.25198291641246, | |
| "grad_norm": 3.295478343963623, | |
| "learning_rate": 2.1639344262295084e-05, | |
| "loss": 0.1971, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 133.3496034167175, | |
| "grad_norm": 3.3247010707855225, | |
| "learning_rate": 2.1508196721311476e-05, | |
| "loss": 0.1963, | |
| "step": 27320 | |
| }, | |
| { | |
| "epoch": 133.44722391702257, | |
| "grad_norm": 4.269167900085449, | |
| "learning_rate": 2.1377049180327873e-05, | |
| "loss": 0.1967, | |
| "step": 27340 | |
| }, | |
| { | |
| "epoch": 133.54484441732765, | |
| "grad_norm": 3.7575721740722656, | |
| "learning_rate": 2.1245901639344262e-05, | |
| "loss": 0.1788, | |
| "step": 27360 | |
| }, | |
| { | |
| "epoch": 133.6424649176327, | |
| "grad_norm": 4.189979553222656, | |
| "learning_rate": 2.1114754098360655e-05, | |
| "loss": 0.1922, | |
| "step": 27380 | |
| }, | |
| { | |
| "epoch": 133.74008541793776, | |
| "grad_norm": 3.48610782623291, | |
| "learning_rate": 2.098360655737705e-05, | |
| "loss": 0.1749, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 133.83770591824282, | |
| "grad_norm": 3.320037364959717, | |
| "learning_rate": 2.0852459016393444e-05, | |
| "loss": 0.1886, | |
| "step": 27420 | |
| }, | |
| { | |
| "epoch": 133.9353264185479, | |
| "grad_norm": 3.347099781036377, | |
| "learning_rate": 2.0721311475409836e-05, | |
| "loss": 0.1717, | |
| "step": 27440 | |
| }, | |
| { | |
| "epoch": 134.03294691885296, | |
| "grad_norm": 2.9406418800354004, | |
| "learning_rate": 2.059016393442623e-05, | |
| "loss": 0.1702, | |
| "step": 27460 | |
| }, | |
| { | |
| "epoch": 134.130567419158, | |
| "grad_norm": 2.3794620037078857, | |
| "learning_rate": 2.0459016393442622e-05, | |
| "loss": 0.1872, | |
| "step": 27480 | |
| }, | |
| { | |
| "epoch": 134.2281879194631, | |
| "grad_norm": 3.1150660514831543, | |
| "learning_rate": 2.0327868852459018e-05, | |
| "loss": 0.2023, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 134.32580841976815, | |
| "grad_norm": 3.679694414138794, | |
| "learning_rate": 2.019672131147541e-05, | |
| "loss": 0.1858, | |
| "step": 27520 | |
| }, | |
| { | |
| "epoch": 134.4234289200732, | |
| "grad_norm": 2.665882110595703, | |
| "learning_rate": 2.0065573770491804e-05, | |
| "loss": 0.1782, | |
| "step": 27540 | |
| }, | |
| { | |
| "epoch": 134.5210494203783, | |
| "grad_norm": 3.0053212642669678, | |
| "learning_rate": 1.99344262295082e-05, | |
| "loss": 0.1773, | |
| "step": 27560 | |
| }, | |
| { | |
| "epoch": 134.61866992068335, | |
| "grad_norm": 2.689307451248169, | |
| "learning_rate": 1.980327868852459e-05, | |
| "loss": 0.1588, | |
| "step": 27580 | |
| }, | |
| { | |
| "epoch": 134.7162904209884, | |
| "grad_norm": 4.318088054656982, | |
| "learning_rate": 1.9672131147540985e-05, | |
| "loss": 0.1436, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 134.81391092129348, | |
| "grad_norm": 3.3378102779388428, | |
| "learning_rate": 1.9540983606557378e-05, | |
| "loss": 0.1919, | |
| "step": 27620 | |
| }, | |
| { | |
| "epoch": 134.91153142159854, | |
| "grad_norm": 3.2355871200561523, | |
| "learning_rate": 1.940983606557377e-05, | |
| "loss": 0.1828, | |
| "step": 27640 | |
| }, | |
| { | |
| "epoch": 135.0091519219036, | |
| "grad_norm": 3.1335229873657227, | |
| "learning_rate": 1.9278688524590167e-05, | |
| "loss": 0.1982, | |
| "step": 27660 | |
| }, | |
| { | |
| "epoch": 135.10677242220865, | |
| "grad_norm": 4.066319465637207, | |
| "learning_rate": 1.9147540983606556e-05, | |
| "loss": 0.1526, | |
| "step": 27680 | |
| }, | |
| { | |
| "epoch": 135.20439292251373, | |
| "grad_norm": 3.631089925765991, | |
| "learning_rate": 1.9016393442622952e-05, | |
| "loss": 0.1776, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 135.3020134228188, | |
| "grad_norm": 3.5840327739715576, | |
| "learning_rate": 1.8885245901639345e-05, | |
| "loss": 0.1922, | |
| "step": 27720 | |
| }, | |
| { | |
| "epoch": 135.39963392312384, | |
| "grad_norm": 2.926558256149292, | |
| "learning_rate": 1.8754098360655738e-05, | |
| "loss": 0.1847, | |
| "step": 27740 | |
| }, | |
| { | |
| "epoch": 135.49725442342893, | |
| "grad_norm": 4.487957000732422, | |
| "learning_rate": 1.8622950819672134e-05, | |
| "loss": 0.1896, | |
| "step": 27760 | |
| }, | |
| { | |
| "epoch": 135.59487492373398, | |
| "grad_norm": 3.209500789642334, | |
| "learning_rate": 1.8491803278688523e-05, | |
| "loss": 0.1827, | |
| "step": 27780 | |
| }, | |
| { | |
| "epoch": 135.69249542403904, | |
| "grad_norm": 2.8735058307647705, | |
| "learning_rate": 1.836065573770492e-05, | |
| "loss": 0.1671, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 135.79011592434412, | |
| "grad_norm": 3.221266508102417, | |
| "learning_rate": 1.8229508196721312e-05, | |
| "loss": 0.1861, | |
| "step": 27820 | |
| }, | |
| { | |
| "epoch": 135.88773642464918, | |
| "grad_norm": 3.1269659996032715, | |
| "learning_rate": 1.8098360655737705e-05, | |
| "loss": 0.1857, | |
| "step": 27840 | |
| }, | |
| { | |
| "epoch": 135.98535692495423, | |
| "grad_norm": 3.3478143215179443, | |
| "learning_rate": 1.79672131147541e-05, | |
| "loss": 0.1853, | |
| "step": 27860 | |
| }, | |
| { | |
| "epoch": 136.08297742525932, | |
| "grad_norm": 2.8102643489837646, | |
| "learning_rate": 1.7836065573770494e-05, | |
| "loss": 0.1691, | |
| "step": 27880 | |
| }, | |
| { | |
| "epoch": 136.18059792556437, | |
| "grad_norm": 2.9172909259796143, | |
| "learning_rate": 1.7704918032786887e-05, | |
| "loss": 0.1805, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 136.27821842586943, | |
| "grad_norm": 3.6803557872772217, | |
| "learning_rate": 1.757377049180328e-05, | |
| "loss": 0.1808, | |
| "step": 27920 | |
| }, | |
| { | |
| "epoch": 136.3758389261745, | |
| "grad_norm": 3.6122121810913086, | |
| "learning_rate": 1.7442622950819672e-05, | |
| "loss": 0.1825, | |
| "step": 27940 | |
| }, | |
| { | |
| "epoch": 136.47345942647956, | |
| "grad_norm": 2.836901903152466, | |
| "learning_rate": 1.731147540983607e-05, | |
| "loss": 0.1762, | |
| "step": 27960 | |
| }, | |
| { | |
| "epoch": 136.57107992678462, | |
| "grad_norm": 4.931893348693848, | |
| "learning_rate": 1.718032786885246e-05, | |
| "loss": 0.2097, | |
| "step": 27980 | |
| }, | |
| { | |
| "epoch": 136.66870042708968, | |
| "grad_norm": 3.634223461151123, | |
| "learning_rate": 1.7049180327868854e-05, | |
| "loss": 0.1764, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 136.76632092739476, | |
| "grad_norm": 4.417370319366455, | |
| "learning_rate": 1.6918032786885247e-05, | |
| "loss": 0.1695, | |
| "step": 28020 | |
| }, | |
| { | |
| "epoch": 136.8639414276998, | |
| "grad_norm": 3.6770431995391846, | |
| "learning_rate": 1.678688524590164e-05, | |
| "loss": 0.1762, | |
| "step": 28040 | |
| }, | |
| { | |
| "epoch": 136.96156192800487, | |
| "grad_norm": 3.134272813796997, | |
| "learning_rate": 1.6655737704918036e-05, | |
| "loss": 0.1603, | |
| "step": 28060 | |
| }, | |
| { | |
| "epoch": 137.05918242830995, | |
| "grad_norm": 3.993882179260254, | |
| "learning_rate": 1.6524590163934428e-05, | |
| "loss": 0.1927, | |
| "step": 28080 | |
| }, | |
| { | |
| "epoch": 137.156802928615, | |
| "grad_norm": 3.1614527702331543, | |
| "learning_rate": 1.6393442622950818e-05, | |
| "loss": 0.1739, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 137.25442342892006, | |
| "grad_norm": 2.9293642044067383, | |
| "learning_rate": 1.6262295081967214e-05, | |
| "loss": 0.1916, | |
| "step": 28120 | |
| }, | |
| { | |
| "epoch": 137.35204392922515, | |
| "grad_norm": 3.292917251586914, | |
| "learning_rate": 1.6131147540983607e-05, | |
| "loss": 0.1638, | |
| "step": 28140 | |
| }, | |
| { | |
| "epoch": 137.4496644295302, | |
| "grad_norm": 3.048471212387085, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.1877, | |
| "step": 28160 | |
| }, | |
| { | |
| "epoch": 137.54728492983526, | |
| "grad_norm": 3.398252487182617, | |
| "learning_rate": 1.5868852459016395e-05, | |
| "loss": 0.1685, | |
| "step": 28180 | |
| }, | |
| { | |
| "epoch": 137.64490543014034, | |
| "grad_norm": 3.906764268875122, | |
| "learning_rate": 1.5737704918032788e-05, | |
| "loss": 0.1772, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 137.7425259304454, | |
| "grad_norm": 3.1852598190307617, | |
| "learning_rate": 1.560655737704918e-05, | |
| "loss": 0.168, | |
| "step": 28220 | |
| }, | |
| { | |
| "epoch": 137.84014643075045, | |
| "grad_norm": 2.9285385608673096, | |
| "learning_rate": 1.5475409836065574e-05, | |
| "loss": 0.1733, | |
| "step": 28240 | |
| }, | |
| { | |
| "epoch": 137.93776693105553, | |
| "grad_norm": 3.9846913814544678, | |
| "learning_rate": 1.5344262295081966e-05, | |
| "loss": 0.1906, | |
| "step": 28260 | |
| }, | |
| { | |
| "epoch": 138.0353874313606, | |
| "grad_norm": 3.3251185417175293, | |
| "learning_rate": 1.5213114754098363e-05, | |
| "loss": 0.1872, | |
| "step": 28280 | |
| }, | |
| { | |
| "epoch": 138.13300793166565, | |
| "grad_norm": 3.7904114723205566, | |
| "learning_rate": 1.5081967213114755e-05, | |
| "loss": 0.1811, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 138.2306284319707, | |
| "grad_norm": 3.5363810062408447, | |
| "learning_rate": 1.4950819672131146e-05, | |
| "loss": 0.1803, | |
| "step": 28320 | |
| }, | |
| { | |
| "epoch": 138.32824893227578, | |
| "grad_norm": 3.430577516555786, | |
| "learning_rate": 1.481967213114754e-05, | |
| "loss": 0.1619, | |
| "step": 28340 | |
| }, | |
| { | |
| "epoch": 138.42586943258084, | |
| "grad_norm": 3.1190154552459717, | |
| "learning_rate": 1.4688524590163935e-05, | |
| "loss": 0.1909, | |
| "step": 28360 | |
| }, | |
| { | |
| "epoch": 138.5234899328859, | |
| "grad_norm": 2.656212091445923, | |
| "learning_rate": 1.455737704918033e-05, | |
| "loss": 0.1682, | |
| "step": 28380 | |
| }, | |
| { | |
| "epoch": 138.62111043319098, | |
| "grad_norm": 3.1637041568756104, | |
| "learning_rate": 1.4426229508196722e-05, | |
| "loss": 0.1729, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 138.71873093349603, | |
| "grad_norm": 3.313136100769043, | |
| "learning_rate": 1.4295081967213114e-05, | |
| "loss": 0.1853, | |
| "step": 28420 | |
| }, | |
| { | |
| "epoch": 138.8163514338011, | |
| "grad_norm": 3.63885498046875, | |
| "learning_rate": 1.4163934426229508e-05, | |
| "loss": 0.1783, | |
| "step": 28440 | |
| }, | |
| { | |
| "epoch": 138.91397193410617, | |
| "grad_norm": 4.251205921173096, | |
| "learning_rate": 1.4032786885245902e-05, | |
| "loss": 0.1703, | |
| "step": 28460 | |
| }, | |
| { | |
| "epoch": 139.01159243441123, | |
| "grad_norm": 3.76887583732605, | |
| "learning_rate": 1.3901639344262297e-05, | |
| "loss": 0.1871, | |
| "step": 28480 | |
| }, | |
| { | |
| "epoch": 139.10921293471628, | |
| "grad_norm": 3.2986671924591064, | |
| "learning_rate": 1.377049180327869e-05, | |
| "loss": 0.1728, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 139.20683343502137, | |
| "grad_norm": 2.5212690830230713, | |
| "learning_rate": 1.3639344262295084e-05, | |
| "loss": 0.175, | |
| "step": 28520 | |
| }, | |
| { | |
| "epoch": 139.30445393532642, | |
| "grad_norm": 4.492109298706055, | |
| "learning_rate": 1.3508196721311475e-05, | |
| "loss": 0.1838, | |
| "step": 28540 | |
| }, | |
| { | |
| "epoch": 139.40207443563148, | |
| "grad_norm": 3.0226120948791504, | |
| "learning_rate": 1.337704918032787e-05, | |
| "loss": 0.1753, | |
| "step": 28560 | |
| }, | |
| { | |
| "epoch": 139.49969493593656, | |
| "grad_norm": 2.4843361377716064, | |
| "learning_rate": 1.3245901639344262e-05, | |
| "loss": 0.1915, | |
| "step": 28580 | |
| }, | |
| { | |
| "epoch": 139.59731543624162, | |
| "grad_norm": 3.4304590225219727, | |
| "learning_rate": 1.3114754098360657e-05, | |
| "loss": 0.1703, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 139.69493593654667, | |
| "grad_norm": 3.242751121520996, | |
| "learning_rate": 1.2983606557377051e-05, | |
| "loss": 0.176, | |
| "step": 28620 | |
| }, | |
| { | |
| "epoch": 139.79255643685173, | |
| "grad_norm": 3.2713284492492676, | |
| "learning_rate": 1.2852459016393442e-05, | |
| "loss": 0.1838, | |
| "step": 28640 | |
| }, | |
| { | |
| "epoch": 139.8901769371568, | |
| "grad_norm": 2.9921929836273193, | |
| "learning_rate": 1.2721311475409837e-05, | |
| "loss": 0.1764, | |
| "step": 28660 | |
| }, | |
| { | |
| "epoch": 139.98779743746186, | |
| "grad_norm": 3.6933350563049316, | |
| "learning_rate": 1.259016393442623e-05, | |
| "loss": 0.1803, | |
| "step": 28680 | |
| }, | |
| { | |
| "epoch": 140.08541793776692, | |
| "grad_norm": 2.947892665863037, | |
| "learning_rate": 1.2459016393442624e-05, | |
| "loss": 0.1732, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 140.183038438072, | |
| "grad_norm": 3.5047738552093506, | |
| "learning_rate": 1.2327868852459017e-05, | |
| "loss": 0.1751, | |
| "step": 28720 | |
| }, | |
| { | |
| "epoch": 140.28065893837706, | |
| "grad_norm": 2.9315600395202637, | |
| "learning_rate": 1.2196721311475411e-05, | |
| "loss": 0.1879, | |
| "step": 28740 | |
| }, | |
| { | |
| "epoch": 140.3782794386821, | |
| "grad_norm": 3.6878082752227783, | |
| "learning_rate": 1.2065573770491804e-05, | |
| "loss": 0.1551, | |
| "step": 28760 | |
| }, | |
| { | |
| "epoch": 140.4758999389872, | |
| "grad_norm": 3.2162342071533203, | |
| "learning_rate": 1.1934426229508197e-05, | |
| "loss": 0.183, | |
| "step": 28780 | |
| }, | |
| { | |
| "epoch": 140.57352043929225, | |
| "grad_norm": 3.6583456993103027, | |
| "learning_rate": 1.1803278688524591e-05, | |
| "loss": 0.1784, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 140.6711409395973, | |
| "grad_norm": 2.780412435531616, | |
| "learning_rate": 1.1672131147540984e-05, | |
| "loss": 0.1885, | |
| "step": 28820 | |
| }, | |
| { | |
| "epoch": 140.7687614399024, | |
| "grad_norm": 2.3773672580718994, | |
| "learning_rate": 1.1540983606557378e-05, | |
| "loss": 0.1611, | |
| "step": 28840 | |
| }, | |
| { | |
| "epoch": 140.86638194020745, | |
| "grad_norm": 3.6276142597198486, | |
| "learning_rate": 1.1409836065573771e-05, | |
| "loss": 0.1806, | |
| "step": 28860 | |
| }, | |
| { | |
| "epoch": 140.9640024405125, | |
| "grad_norm": 3.3313121795654297, | |
| "learning_rate": 1.1278688524590164e-05, | |
| "loss": 0.1741, | |
| "step": 28880 | |
| }, | |
| { | |
| "epoch": 141.06162294081759, | |
| "grad_norm": 3.1109941005706787, | |
| "learning_rate": 1.1147540983606558e-05, | |
| "loss": 0.1747, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 141.15924344112264, | |
| "grad_norm": 2.3083291053771973, | |
| "learning_rate": 1.1016393442622953e-05, | |
| "loss": 0.16, | |
| "step": 28920 | |
| }, | |
| { | |
| "epoch": 141.2568639414277, | |
| "grad_norm": 3.6427536010742188, | |
| "learning_rate": 1.0885245901639344e-05, | |
| "loss": 0.1792, | |
| "step": 28940 | |
| }, | |
| { | |
| "epoch": 141.35448444173275, | |
| "grad_norm": 3.2836146354675293, | |
| "learning_rate": 1.0754098360655738e-05, | |
| "loss": 0.1921, | |
| "step": 28960 | |
| }, | |
| { | |
| "epoch": 141.45210494203783, | |
| "grad_norm": 3.712411642074585, | |
| "learning_rate": 1.0622950819672131e-05, | |
| "loss": 0.1834, | |
| "step": 28980 | |
| }, | |
| { | |
| "epoch": 141.5497254423429, | |
| "grad_norm": 2.6064016819000244, | |
| "learning_rate": 1.0491803278688525e-05, | |
| "loss": 0.1975, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 141.64734594264795, | |
| "grad_norm": 3.3907470703125, | |
| "learning_rate": 1.0360655737704918e-05, | |
| "loss": 0.172, | |
| "step": 29020 | |
| }, | |
| { | |
| "epoch": 141.74496644295303, | |
| "grad_norm": 3.0713050365448, | |
| "learning_rate": 1.0229508196721311e-05, | |
| "loss": 0.1547, | |
| "step": 29040 | |
| }, | |
| { | |
| "epoch": 141.84258694325808, | |
| "grad_norm": 5.218588352203369, | |
| "learning_rate": 1.0098360655737705e-05, | |
| "loss": 0.1712, | |
| "step": 29060 | |
| }, | |
| { | |
| "epoch": 141.94020744356314, | |
| "grad_norm": 3.205132007598877, | |
| "learning_rate": 9.9672131147541e-06, | |
| "loss": 0.1629, | |
| "step": 29080 | |
| }, | |
| { | |
| "epoch": 142.03782794386822, | |
| "grad_norm": 3.580003499984741, | |
| "learning_rate": 9.836065573770493e-06, | |
| "loss": 0.1877, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 142.13544844417328, | |
| "grad_norm": 3.393789529800415, | |
| "learning_rate": 9.704918032786885e-06, | |
| "loss": 0.1928, | |
| "step": 29120 | |
| }, | |
| { | |
| "epoch": 142.23306894447833, | |
| "grad_norm": 2.592445135116577, | |
| "learning_rate": 9.573770491803278e-06, | |
| "loss": 0.1638, | |
| "step": 29140 | |
| }, | |
| { | |
| "epoch": 142.33068944478342, | |
| "grad_norm": 4.577868938446045, | |
| "learning_rate": 9.442622950819673e-06, | |
| "loss": 0.1868, | |
| "step": 29160 | |
| }, | |
| { | |
| "epoch": 142.42830994508847, | |
| "grad_norm": 3.1620144844055176, | |
| "learning_rate": 9.311475409836067e-06, | |
| "loss": 0.1635, | |
| "step": 29180 | |
| }, | |
| { | |
| "epoch": 142.52593044539353, | |
| "grad_norm": 3.382749319076538, | |
| "learning_rate": 9.18032786885246e-06, | |
| "loss": 0.1469, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 142.6235509456986, | |
| "grad_norm": 3.314983606338501, | |
| "learning_rate": 9.049180327868853e-06, | |
| "loss": 0.1719, | |
| "step": 29220 | |
| }, | |
| { | |
| "epoch": 142.72117144600367, | |
| "grad_norm": 2.643578290939331, | |
| "learning_rate": 8.918032786885247e-06, | |
| "loss": 0.1587, | |
| "step": 29240 | |
| }, | |
| { | |
| "epoch": 142.81879194630872, | |
| "grad_norm": 2.4660592079162598, | |
| "learning_rate": 8.78688524590164e-06, | |
| "loss": 0.2031, | |
| "step": 29260 | |
| }, | |
| { | |
| "epoch": 142.91641244661378, | |
| "grad_norm": 3.2867209911346436, | |
| "learning_rate": 8.655737704918034e-06, | |
| "loss": 0.1679, | |
| "step": 29280 | |
| }, | |
| { | |
| "epoch": 143.01403294691886, | |
| "grad_norm": 3.2089104652404785, | |
| "learning_rate": 8.524590163934427e-06, | |
| "loss": 0.171, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 143.11165344722392, | |
| "grad_norm": 2.9183740615844727, | |
| "learning_rate": 8.39344262295082e-06, | |
| "loss": 0.1642, | |
| "step": 29320 | |
| }, | |
| { | |
| "epoch": 143.20927394752897, | |
| "grad_norm": 2.9158482551574707, | |
| "learning_rate": 8.262295081967214e-06, | |
| "loss": 0.1743, | |
| "step": 29340 | |
| }, | |
| { | |
| "epoch": 143.30689444783405, | |
| "grad_norm": 3.256065607070923, | |
| "learning_rate": 8.131147540983607e-06, | |
| "loss": 0.1816, | |
| "step": 29360 | |
| }, | |
| { | |
| "epoch": 143.4045149481391, | |
| "grad_norm": 2.916098117828369, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.1575, | |
| "step": 29380 | |
| }, | |
| { | |
| "epoch": 143.50213544844416, | |
| "grad_norm": 3.414485454559326, | |
| "learning_rate": 7.868852459016394e-06, | |
| "loss": 0.1584, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 143.59975594874925, | |
| "grad_norm": 3.5028018951416016, | |
| "learning_rate": 7.737704918032787e-06, | |
| "loss": 0.1731, | |
| "step": 29420 | |
| }, | |
| { | |
| "epoch": 143.6973764490543, | |
| "grad_norm": 2.7573652267456055, | |
| "learning_rate": 7.606557377049181e-06, | |
| "loss": 0.1848, | |
| "step": 29440 | |
| }, | |
| { | |
| "epoch": 143.79499694935936, | |
| "grad_norm": 3.1906893253326416, | |
| "learning_rate": 7.475409836065573e-06, | |
| "loss": 0.1842, | |
| "step": 29460 | |
| }, | |
| { | |
| "epoch": 143.89261744966444, | |
| "grad_norm": 2.384742021560669, | |
| "learning_rate": 7.344262295081968e-06, | |
| "loss": 0.1852, | |
| "step": 29480 | |
| }, | |
| { | |
| "epoch": 143.9902379499695, | |
| "grad_norm": 3.7481918334960938, | |
| "learning_rate": 7.213114754098361e-06, | |
| "loss": 0.1794, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 144.08785845027455, | |
| "grad_norm": 2.903989791870117, | |
| "learning_rate": 7.081967213114754e-06, | |
| "loss": 0.1745, | |
| "step": 29520 | |
| }, | |
| { | |
| "epoch": 144.1854789505796, | |
| "grad_norm": 3.4449713230133057, | |
| "learning_rate": 6.9508196721311484e-06, | |
| "loss": 0.1874, | |
| "step": 29540 | |
| }, | |
| { | |
| "epoch": 144.2830994508847, | |
| "grad_norm": 2.9290127754211426, | |
| "learning_rate": 6.819672131147542e-06, | |
| "loss": 0.1736, | |
| "step": 29560 | |
| }, | |
| { | |
| "epoch": 144.38071995118975, | |
| "grad_norm": 2.805908203125, | |
| "learning_rate": 6.688524590163935e-06, | |
| "loss": 0.187, | |
| "step": 29580 | |
| }, | |
| { | |
| "epoch": 144.4783404514948, | |
| "grad_norm": 2.9539241790771484, | |
| "learning_rate": 6.557377049180328e-06, | |
| "loss": 0.1856, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 144.57596095179989, | |
| "grad_norm": 2.8198204040527344, | |
| "learning_rate": 6.426229508196721e-06, | |
| "loss": 0.1733, | |
| "step": 29620 | |
| }, | |
| { | |
| "epoch": 144.67358145210494, | |
| "grad_norm": 3.3926849365234375, | |
| "learning_rate": 6.295081967213115e-06, | |
| "loss": 0.18, | |
| "step": 29640 | |
| }, | |
| { | |
| "epoch": 144.77120195241, | |
| "grad_norm": 4.100579261779785, | |
| "learning_rate": 6.163934426229508e-06, | |
| "loss": 0.1568, | |
| "step": 29660 | |
| }, | |
| { | |
| "epoch": 144.86882245271508, | |
| "grad_norm": 3.2875492572784424, | |
| "learning_rate": 6.032786885245902e-06, | |
| "loss": 0.1561, | |
| "step": 29680 | |
| }, | |
| { | |
| "epoch": 144.96644295302013, | |
| "grad_norm": 2.626185417175293, | |
| "learning_rate": 5.9016393442622956e-06, | |
| "loss": 0.1714, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 145.0640634533252, | |
| "grad_norm": 4.3447265625, | |
| "learning_rate": 5.770491803278689e-06, | |
| "loss": 0.1951, | |
| "step": 29720 | |
| }, | |
| { | |
| "epoch": 145.16168395363027, | |
| "grad_norm": 4.056821346282959, | |
| "learning_rate": 5.639344262295082e-06, | |
| "loss": 0.1621, | |
| "step": 29740 | |
| }, | |
| { | |
| "epoch": 145.25930445393533, | |
| "grad_norm": 3.4116666316986084, | |
| "learning_rate": 5.508196721311476e-06, | |
| "loss": 0.179, | |
| "step": 29760 | |
| }, | |
| { | |
| "epoch": 145.35692495424038, | |
| "grad_norm": 2.810452699661255, | |
| "learning_rate": 5.377049180327869e-06, | |
| "loss": 0.1723, | |
| "step": 29780 | |
| }, | |
| { | |
| "epoch": 145.45454545454547, | |
| "grad_norm": 3.708115816116333, | |
| "learning_rate": 5.245901639344263e-06, | |
| "loss": 0.1675, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 145.55216595485052, | |
| "grad_norm": 4.000546455383301, | |
| "learning_rate": 5.1147540983606555e-06, | |
| "loss": 0.1716, | |
| "step": 29820 | |
| }, | |
| { | |
| "epoch": 145.64978645515558, | |
| "grad_norm": 3.2421109676361084, | |
| "learning_rate": 4.98360655737705e-06, | |
| "loss": 0.1656, | |
| "step": 29840 | |
| }, | |
| { | |
| "epoch": 145.74740695546063, | |
| "grad_norm": 3.13706111907959, | |
| "learning_rate": 4.852459016393443e-06, | |
| "loss": 0.1822, | |
| "step": 29860 | |
| }, | |
| { | |
| "epoch": 145.84502745576572, | |
| "grad_norm": 3.364842176437378, | |
| "learning_rate": 4.721311475409836e-06, | |
| "loss": 0.1772, | |
| "step": 29880 | |
| }, | |
| { | |
| "epoch": 145.94264795607077, | |
| "grad_norm": 3.2013063430786133, | |
| "learning_rate": 4.59016393442623e-06, | |
| "loss": 0.1717, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 146.04026845637583, | |
| "grad_norm": 4.174123287200928, | |
| "learning_rate": 4.4590163934426235e-06, | |
| "loss": 0.1731, | |
| "step": 29920 | |
| }, | |
| { | |
| "epoch": 146.1378889566809, | |
| "grad_norm": 2.8885281085968018, | |
| "learning_rate": 4.327868852459017e-06, | |
| "loss": 0.1421, | |
| "step": 29940 | |
| }, | |
| { | |
| "epoch": 146.23550945698597, | |
| "grad_norm": 2.6078240871429443, | |
| "learning_rate": 4.19672131147541e-06, | |
| "loss": 0.1898, | |
| "step": 29960 | |
| }, | |
| { | |
| "epoch": 146.33312995729102, | |
| "grad_norm": 4.43600606918335, | |
| "learning_rate": 4.0655737704918034e-06, | |
| "loss": 0.1787, | |
| "step": 29980 | |
| }, | |
| { | |
| "epoch": 146.4307504575961, | |
| "grad_norm": 2.9905123710632324, | |
| "learning_rate": 3.934426229508197e-06, | |
| "loss": 0.1866, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 146.52837095790116, | |
| "grad_norm": 2.8058741092681885, | |
| "learning_rate": 3.8032786885245906e-06, | |
| "loss": 0.1751, | |
| "step": 30020 | |
| }, | |
| { | |
| "epoch": 146.62599145820622, | |
| "grad_norm": 2.752978563308716, | |
| "learning_rate": 3.672131147540984e-06, | |
| "loss": 0.1826, | |
| "step": 30040 | |
| }, | |
| { | |
| "epoch": 146.7236119585113, | |
| "grad_norm": 3.0315961837768555, | |
| "learning_rate": 3.540983606557377e-06, | |
| "loss": 0.1623, | |
| "step": 30060 | |
| }, | |
| { | |
| "epoch": 146.82123245881635, | |
| "grad_norm": 3.3782765865325928, | |
| "learning_rate": 3.409836065573771e-06, | |
| "loss": 0.1784, | |
| "step": 30080 | |
| }, | |
| { | |
| "epoch": 146.9188529591214, | |
| "grad_norm": 2.961002826690674, | |
| "learning_rate": 3.278688524590164e-06, | |
| "loss": 0.1764, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 147.0164734594265, | |
| "grad_norm": 3.775484561920166, | |
| "learning_rate": 3.1475409836065574e-06, | |
| "loss": 0.1595, | |
| "step": 30120 | |
| }, | |
| { | |
| "epoch": 147.11409395973155, | |
| "grad_norm": 4.18531608581543, | |
| "learning_rate": 3.016393442622951e-06, | |
| "loss": 0.1599, | |
| "step": 30140 | |
| }, | |
| { | |
| "epoch": 147.2117144600366, | |
| "grad_norm": 2.9723432064056396, | |
| "learning_rate": 2.8852459016393446e-06, | |
| "loss": 0.159, | |
| "step": 30160 | |
| }, | |
| { | |
| "epoch": 147.30933496034166, | |
| "grad_norm": 3.2833070755004883, | |
| "learning_rate": 2.754098360655738e-06, | |
| "loss": 0.1657, | |
| "step": 30180 | |
| }, | |
| { | |
| "epoch": 147.40695546064674, | |
| "grad_norm": 3.4174959659576416, | |
| "learning_rate": 2.6229508196721314e-06, | |
| "loss": 0.175, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 147.5045759609518, | |
| "grad_norm": 3.3127195835113525, | |
| "learning_rate": 2.491803278688525e-06, | |
| "loss": 0.1815, | |
| "step": 30220 | |
| }, | |
| { | |
| "epoch": 147.60219646125685, | |
| "grad_norm": 3.7137949466705322, | |
| "learning_rate": 2.360655737704918e-06, | |
| "loss": 0.198, | |
| "step": 30240 | |
| }, | |
| { | |
| "epoch": 147.69981696156194, | |
| "grad_norm": 2.630924701690674, | |
| "learning_rate": 2.2295081967213117e-06, | |
| "loss": 0.1687, | |
| "step": 30260 | |
| }, | |
| { | |
| "epoch": 147.797437461867, | |
| "grad_norm": 3.330245018005371, | |
| "learning_rate": 2.098360655737705e-06, | |
| "loss": 0.1581, | |
| "step": 30280 | |
| }, | |
| { | |
| "epoch": 147.89505796217205, | |
| "grad_norm": 3.237410068511963, | |
| "learning_rate": 1.9672131147540985e-06, | |
| "loss": 0.2022, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 147.99267846247713, | |
| "grad_norm": 2.633331537246704, | |
| "learning_rate": 1.836065573770492e-06, | |
| "loss": 0.1613, | |
| "step": 30320 | |
| }, | |
| { | |
| "epoch": 148.09029896278219, | |
| "grad_norm": 2.527902603149414, | |
| "learning_rate": 1.7049180327868855e-06, | |
| "loss": 0.1749, | |
| "step": 30340 | |
| }, | |
| { | |
| "epoch": 148.18791946308724, | |
| "grad_norm": 2.9230234622955322, | |
| "learning_rate": 1.5737704918032787e-06, | |
| "loss": 0.1464, | |
| "step": 30360 | |
| }, | |
| { | |
| "epoch": 148.28553996339232, | |
| "grad_norm": 2.591038703918457, | |
| "learning_rate": 1.4426229508196723e-06, | |
| "loss": 0.1819, | |
| "step": 30380 | |
| }, | |
| { | |
| "epoch": 148.38316046369738, | |
| "grad_norm": 3.6826913356781006, | |
| "learning_rate": 1.3114754098360657e-06, | |
| "loss": 0.1909, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 148.48078096400243, | |
| "grad_norm": 3.1828205585479736, | |
| "learning_rate": 1.180327868852459e-06, | |
| "loss": 0.1727, | |
| "step": 30420 | |
| }, | |
| { | |
| "epoch": 148.57840146430752, | |
| "grad_norm": 3.3356974124908447, | |
| "learning_rate": 1.0491803278688525e-06, | |
| "loss": 0.1624, | |
| "step": 30440 | |
| }, | |
| { | |
| "epoch": 148.67602196461257, | |
| "grad_norm": 3.1692721843719482, | |
| "learning_rate": 9.18032786885246e-07, | |
| "loss": 0.1769, | |
| "step": 30460 | |
| }, | |
| { | |
| "epoch": 148.77364246491763, | |
| "grad_norm": 2.968018054962158, | |
| "learning_rate": 7.868852459016393e-07, | |
| "loss": 0.1594, | |
| "step": 30480 | |
| }, | |
| { | |
| "epoch": 148.87126296522268, | |
| "grad_norm": 3.693136692047119, | |
| "learning_rate": 6.557377049180328e-07, | |
| "loss": 0.1927, | |
| "step": 30500 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 30600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 150, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.924112697660375e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |