|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998981773750127, |
|
"eval_steps": 500, |
|
"global_step": 2455, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004072904999490887, |
|
"grad_norm": 0.7094523906707764, |
|
"learning_rate": 2.702702702702703e-05, |
|
"loss": 1.8961, |
|
"mean_token_accuracy": 0.5438283555209636, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008145809998981774, |
|
"grad_norm": 0.4965726137161255, |
|
"learning_rate": 5.405405405405406e-05, |
|
"loss": 2.0135, |
|
"mean_token_accuracy": 0.5206024497747421, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01221871499847266, |
|
"grad_norm": 0.5204955339431763, |
|
"learning_rate": 8.108108108108109e-05, |
|
"loss": 1.7338, |
|
"mean_token_accuracy": 0.5830004885792732, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01629161999796355, |
|
"grad_norm": 0.4678299129009247, |
|
"learning_rate": 0.00010810810810810812, |
|
"loss": 1.7561, |
|
"mean_token_accuracy": 0.5730855345726014, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.020364524997454433, |
|
"grad_norm": 0.439376562833786, |
|
"learning_rate": 0.00013513513513513514, |
|
"loss": 1.7277, |
|
"mean_token_accuracy": 0.5785414174199104, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02443742999694532, |
|
"grad_norm": 0.5652154684066772, |
|
"learning_rate": 0.00016216216216216218, |
|
"loss": 1.5663, |
|
"mean_token_accuracy": 0.6036677993834019, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02851033499643621, |
|
"grad_norm": 0.5163070559501648, |
|
"learning_rate": 0.0001891891891891892, |
|
"loss": 1.8259, |
|
"mean_token_accuracy": 0.5530456639826298, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0325832399959271, |
|
"grad_norm": 0.41974571347236633, |
|
"learning_rate": 0.00019999686634492516, |
|
"loss": 1.6554, |
|
"mean_token_accuracy": 0.6073097884654999, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03665614499541798, |
|
"grad_norm": 0.5097134709358215, |
|
"learning_rate": 0.00019997771694180204, |
|
"loss": 1.7208, |
|
"mean_token_accuracy": 0.5835812106728554, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04072904999490887, |
|
"grad_norm": 0.3469955623149872, |
|
"learning_rate": 0.00019994116238472668, |
|
"loss": 1.7954, |
|
"mean_token_accuracy": 0.5794057920575142, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.044801954994399755, |
|
"grad_norm": 0.5898286700248718, |
|
"learning_rate": 0.0001998872090374941, |
|
"loss": 1.8089, |
|
"mean_token_accuracy": 0.5614037178456783, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04887485999389064, |
|
"grad_norm": 0.3150334656238556, |
|
"learning_rate": 0.0001998158662928604, |
|
"loss": 1.5827, |
|
"mean_token_accuracy": 0.5801003761589527, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05294776499338153, |
|
"grad_norm": 0.3100312352180481, |
|
"learning_rate": 0.00019972714657090772, |
|
"loss": 1.6712, |
|
"mean_token_accuracy": 0.5900266923010349, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05702066999287242, |
|
"grad_norm": 0.30420345067977905, |
|
"learning_rate": 0.0001996210653168819, |
|
"loss": 1.646, |
|
"mean_token_accuracy": 0.5839473098516464, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0610935749923633, |
|
"grad_norm": 0.454593688249588, |
|
"learning_rate": 0.0001994976409985037, |
|
"loss": 1.7184, |
|
"mean_token_accuracy": 0.566600239276886, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0651664799918542, |
|
"grad_norm": 0.35688647627830505, |
|
"learning_rate": 0.0001993568951027537, |
|
"loss": 1.6766, |
|
"mean_token_accuracy": 0.5824202686548233, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06923938499134508, |
|
"grad_norm": 0.3199939727783203, |
|
"learning_rate": 0.0001991988521321317, |
|
"loss": 1.542, |
|
"mean_token_accuracy": 0.6095141984522343, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07331228999083596, |
|
"grad_norm": 0.5523242950439453, |
|
"learning_rate": 0.00019902353960039087, |
|
"loss": 1.7218, |
|
"mean_token_accuracy": 0.5745485998690129, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07738519499032685, |
|
"grad_norm": 0.3872721493244171, |
|
"learning_rate": 0.00019883098802774812, |
|
"loss": 1.7306, |
|
"mean_token_accuracy": 0.5514535710215569, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08145809998981773, |
|
"grad_norm": 0.2679811120033264, |
|
"learning_rate": 0.0001986212309355707, |
|
"loss": 1.6524, |
|
"mean_token_accuracy": 0.5822832569479942, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08553100498930863, |
|
"grad_norm": 0.5167363882064819, |
|
"learning_rate": 0.00019839430484054046, |
|
"loss": 1.6964, |
|
"mean_token_accuracy": 0.573430598527193, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08960390998879951, |
|
"grad_norm": 0.4363399147987366, |
|
"learning_rate": 0.0001981502492482967, |
|
"loss": 1.7296, |
|
"mean_token_accuracy": 0.5835007324814796, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0936768149882904, |
|
"grad_norm": 0.4052150845527649, |
|
"learning_rate": 0.00019788910664655848, |
|
"loss": 1.5772, |
|
"mean_token_accuracy": 0.5771500714123249, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.09774971998778129, |
|
"grad_norm": 0.34224212169647217, |
|
"learning_rate": 0.0001976109224977281, |
|
"loss": 1.6263, |
|
"mean_token_accuracy": 0.5942870646715164, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.10182262498727217, |
|
"grad_norm": 0.4852873980998993, |
|
"learning_rate": 0.00019731574523097647, |
|
"loss": 1.5731, |
|
"mean_token_accuracy": 0.5886094763875007, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.10589552998676306, |
|
"grad_norm": 0.30241233110427856, |
|
"learning_rate": 0.00019700362623381197, |
|
"loss": 1.8311, |
|
"mean_token_accuracy": 0.5616028495132923, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.10996843498625394, |
|
"grad_norm": 0.38147303462028503, |
|
"learning_rate": 0.00019667461984313448, |
|
"loss": 1.652, |
|
"mean_token_accuracy": 0.5836799181997776, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.11404133998574484, |
|
"grad_norm": 0.3174324333667755, |
|
"learning_rate": 0.00019632878333577592, |
|
"loss": 1.6831, |
|
"mean_token_accuracy": 0.5850063987076283, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.11811424498523572, |
|
"grad_norm": 0.350323349237442, |
|
"learning_rate": 0.00019596617691852863, |
|
"loss": 1.6644, |
|
"mean_token_accuracy": 0.5841067053377629, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1221871499847266, |
|
"grad_norm": 0.30346542596817017, |
|
"learning_rate": 0.0001955868637176643, |
|
"loss": 1.656, |
|
"mean_token_accuracy": 0.584677055478096, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1262600549842175, |
|
"grad_norm": 0.2639765739440918, |
|
"learning_rate": 0.00019519090976794406, |
|
"loss": 1.7454, |
|
"mean_token_accuracy": 0.5678185373544693, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1303329599837084, |
|
"grad_norm": 0.3039887547492981, |
|
"learning_rate": 0.00019477838400112254, |
|
"loss": 1.754, |
|
"mean_token_accuracy": 0.5744720883667469, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.13440586498319926, |
|
"grad_norm": 0.35102295875549316, |
|
"learning_rate": 0.00019434935823394746, |
|
"loss": 1.6665, |
|
"mean_token_accuracy": 0.5876846723258495, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.13847876998269015, |
|
"grad_norm": 0.3325759470462799, |
|
"learning_rate": 0.00019390390715565725, |
|
"loss": 1.6773, |
|
"mean_token_accuracy": 0.5869172632694244, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.14255167498218105, |
|
"grad_norm": 0.37209993600845337, |
|
"learning_rate": 0.000193442108314978, |
|
"loss": 1.6328, |
|
"mean_token_accuracy": 0.5927142709493637, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.14662457998167192, |
|
"grad_norm": 0.2964717149734497, |
|
"learning_rate": 0.00019296404210662331, |
|
"loss": 1.5659, |
|
"mean_token_accuracy": 0.60322862342, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1506974849811628, |
|
"grad_norm": 0.29879456758499146, |
|
"learning_rate": 0.00019246979175729822, |
|
"loss": 1.7083, |
|
"mean_token_accuracy": 0.5893984287977219, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1547703899806537, |
|
"grad_norm": 0.3726056218147278, |
|
"learning_rate": 0.00019195944331121015, |
|
"loss": 1.6854, |
|
"mean_token_accuracy": 0.5761750474572181, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1588432949801446, |
|
"grad_norm": 0.517816424369812, |
|
"learning_rate": 0.0001914330856150897, |
|
"loss": 1.7282, |
|
"mean_token_accuracy": 0.5854727104306221, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.16291619997963547, |
|
"grad_norm": 0.25848233699798584, |
|
"learning_rate": 0.00019089081030272296, |
|
"loss": 1.5562, |
|
"mean_token_accuracy": 0.6038706839084625, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16698910497912636, |
|
"grad_norm": 0.3191607892513275, |
|
"learning_rate": 0.00019033271177899922, |
|
"loss": 1.6452, |
|
"mean_token_accuracy": 0.5812859788537026, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.17106200997861726, |
|
"grad_norm": 0.3990655243396759, |
|
"learning_rate": 0.0001897588872034758, |
|
"loss": 1.626, |
|
"mean_token_accuracy": 0.569889971613884, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.17513491497810812, |
|
"grad_norm": 0.346086323261261, |
|
"learning_rate": 0.00018916943647346375, |
|
"loss": 1.7451, |
|
"mean_token_accuracy": 0.578500047326088, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.17920781997759902, |
|
"grad_norm": 0.36437422037124634, |
|
"learning_rate": 0.0001885644622066364, |
|
"loss": 1.846, |
|
"mean_token_accuracy": 0.5627885892987251, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.18328072497708991, |
|
"grad_norm": 0.2968160808086395, |
|
"learning_rate": 0.00018794406972316482, |
|
"loss": 1.671, |
|
"mean_token_accuracy": 0.5769762165844441, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1873536299765808, |
|
"grad_norm": 0.2781198024749756, |
|
"learning_rate": 0.00018730836702738257, |
|
"loss": 1.4983, |
|
"mean_token_accuracy": 0.613883113116026, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.19142653497607168, |
|
"grad_norm": 0.4645621180534363, |
|
"learning_rate": 0.0001866574647889831, |
|
"loss": 1.6776, |
|
"mean_token_accuracy": 0.5890260674059391, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.19549943997556257, |
|
"grad_norm": 0.3920878767967224, |
|
"learning_rate": 0.00018599147632375332, |
|
"loss": 1.802, |
|
"mean_token_accuracy": 0.568213502317667, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.19957234497505347, |
|
"grad_norm": 0.3473225235939026, |
|
"learning_rate": 0.00018531051757384633, |
|
"loss": 1.7161, |
|
"mean_token_accuracy": 0.5727271348237991, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.20364524997454433, |
|
"grad_norm": 0.30091673135757446, |
|
"learning_rate": 0.00018461470708759712, |
|
"loss": 1.7042, |
|
"mean_token_accuracy": 0.5763454169034958, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.20771815497403523, |
|
"grad_norm": 0.31175661087036133, |
|
"learning_rate": 0.00018390416599888435, |
|
"loss": 1.689, |
|
"mean_token_accuracy": 0.5796464458107948, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.21179105997352612, |
|
"grad_norm": 0.3624255955219269, |
|
"learning_rate": 0.0001831790180060422, |
|
"loss": 1.5619, |
|
"mean_token_accuracy": 0.6015763126313687, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.215863964973017, |
|
"grad_norm": 0.2667541205883026, |
|
"learning_rate": 0.00018243938935032561, |
|
"loss": 1.6877, |
|
"mean_token_accuracy": 0.5839527539908886, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2199368699725079, |
|
"grad_norm": 0.31019967794418335, |
|
"learning_rate": 0.00018168540879393296, |
|
"loss": 1.7831, |
|
"mean_token_accuracy": 0.5688935197889805, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.22400977497199878, |
|
"grad_norm": 0.2726418673992157, |
|
"learning_rate": 0.0001809172075975897, |
|
"loss": 1.7288, |
|
"mean_token_accuracy": 0.5798229008913041, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.22808267997148968, |
|
"grad_norm": 0.29514381289482117, |
|
"learning_rate": 0.00018013491949769734, |
|
"loss": 1.7188, |
|
"mean_token_accuracy": 0.5756009854376316, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.23215558497098054, |
|
"grad_norm": 0.37964069843292236, |
|
"learning_rate": 0.00017933868068305104, |
|
"loss": 1.6244, |
|
"mean_token_accuracy": 0.5932842157781124, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.23622848997047144, |
|
"grad_norm": 0.300620436668396, |
|
"learning_rate": 0.0001785286297711305, |
|
"loss": 1.5565, |
|
"mean_token_accuracy": 0.5965760670602321, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.24030139496996233, |
|
"grad_norm": 0.5466737151145935, |
|
"learning_rate": 0.00017770490778396808, |
|
"loss": 1.6532, |
|
"mean_token_accuracy": 0.5821332208812237, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2443742999694532, |
|
"grad_norm": 0.3445660173892975, |
|
"learning_rate": 0.00017686765812359808, |
|
"loss": 1.7585, |
|
"mean_token_accuracy": 0.5790032669901848, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2484472049689441, |
|
"grad_norm": 0.3492606282234192, |
|
"learning_rate": 0.0001760170265470921, |
|
"loss": 1.6401, |
|
"mean_token_accuracy": 0.6002471588551999, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.252520109968435, |
|
"grad_norm": 0.31294527649879456, |
|
"learning_rate": 0.00017515316114118375, |
|
"loss": 1.6915, |
|
"mean_token_accuracy": 0.5570432722568512, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.25659301496792586, |
|
"grad_norm": 0.27257561683654785, |
|
"learning_rate": 0.00017427621229648853, |
|
"loss": 1.5666, |
|
"mean_token_accuracy": 0.6028999522328377, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2606659199674168, |
|
"grad_norm": 0.29818692803382874, |
|
"learning_rate": 0.00017338633268132212, |
|
"loss": 1.5926, |
|
"mean_token_accuracy": 0.5965964362025261, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.26473882496690765, |
|
"grad_norm": 0.49210649728775024, |
|
"learning_rate": 0.0001724836772151223, |
|
"loss": 1.5925, |
|
"mean_token_accuracy": 0.5952631443738937, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2688117299663985, |
|
"grad_norm": 0.3807302713394165, |
|
"learning_rate": 0.00017156840304147902, |
|
"loss": 1.6237, |
|
"mean_token_accuracy": 0.5884141281247139, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.27288463496588944, |
|
"grad_norm": 0.2621661126613617, |
|
"learning_rate": 0.00017064066950077722, |
|
"loss": 1.7356, |
|
"mean_token_accuracy": 0.5827617473900318, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2769575399653803, |
|
"grad_norm": 0.30957838892936707, |
|
"learning_rate": 0.00016970063810245716, |
|
"loss": 1.5585, |
|
"mean_token_accuracy": 0.5888052701950073, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2810304449648712, |
|
"grad_norm": 0.2501460611820221, |
|
"learning_rate": 0.00016874847249689722, |
|
"loss": 1.5913, |
|
"mean_token_accuracy": 0.5886548452079297, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2851033499643621, |
|
"grad_norm": 0.3207811415195465, |
|
"learning_rate": 0.00016778433844692397, |
|
"loss": 1.6791, |
|
"mean_token_accuracy": 0.5861249402165413, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.28917625496385296, |
|
"grad_norm": 0.45466601848602295, |
|
"learning_rate": 0.0001668084037989544, |
|
"loss": 1.5153, |
|
"mean_token_accuracy": 0.5999807387590408, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.29324915996334383, |
|
"grad_norm": 0.34910282492637634, |
|
"learning_rate": 0.00016582083845377552, |
|
"loss": 1.6821, |
|
"mean_token_accuracy": 0.5889992110431195, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.29732206496283475, |
|
"grad_norm": 0.4916020929813385, |
|
"learning_rate": 0.00016482181433696643, |
|
"loss": 1.8462, |
|
"mean_token_accuracy": 0.5748938458040357, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3013949699623256, |
|
"grad_norm": 0.2545833885669708, |
|
"learning_rate": 0.00016381150536896736, |
|
"loss": 1.5756, |
|
"mean_token_accuracy": 0.6056667067110538, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3054678749618165, |
|
"grad_norm": 0.30347147583961487, |
|
"learning_rate": 0.0001627900874348022, |
|
"loss": 1.6003, |
|
"mean_token_accuracy": 0.5873342089354991, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3095407799613074, |
|
"grad_norm": 0.37371426820755005, |
|
"learning_rate": 0.0001617577383534584, |
|
"loss": 1.6576, |
|
"mean_token_accuracy": 0.5790071420371532, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3136136849607983, |
|
"grad_norm": 0.41969504952430725, |
|
"learning_rate": 0.00016071463784693045, |
|
"loss": 1.6181, |
|
"mean_token_accuracy": 0.5854876518249512, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3176865899602892, |
|
"grad_norm": 0.17495319247245789, |
|
"learning_rate": 0.00015966096750893197, |
|
"loss": 1.5142, |
|
"mean_token_accuracy": 0.6079291738569736, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.32175949495978007, |
|
"grad_norm": 0.30013784766197205, |
|
"learning_rate": 0.00015859691077328215, |
|
"loss": 1.6583, |
|
"mean_token_accuracy": 0.581703095138073, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.32583239995927094, |
|
"grad_norm": 0.3358050584793091, |
|
"learning_rate": 0.00015752265288197155, |
|
"loss": 1.6468, |
|
"mean_token_accuracy": 0.6049091577529907, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.32990530495876186, |
|
"grad_norm": 0.3690403699874878, |
|
"learning_rate": 0.00015643838085291323, |
|
"loss": 1.8431, |
|
"mean_token_accuracy": 0.5602408707141876, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3339782099582527, |
|
"grad_norm": 0.34296655654907227, |
|
"learning_rate": 0.00015534428344738505, |
|
"loss": 1.7042, |
|
"mean_token_accuracy": 0.5799131192266941, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3380511149577436, |
|
"grad_norm": 0.2764555513858795, |
|
"learning_rate": 0.00015424055113716763, |
|
"loss": 1.5479, |
|
"mean_token_accuracy": 0.5909703068435193, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3421240199572345, |
|
"grad_norm": 0.26227012276649475, |
|
"learning_rate": 0.0001531273760713855, |
|
"loss": 1.5303, |
|
"mean_token_accuracy": 0.5910052061080933, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.3461969249567254, |
|
"grad_norm": 0.3656936585903168, |
|
"learning_rate": 0.00015200495204305574, |
|
"loss": 1.5586, |
|
"mean_token_accuracy": 0.5943005800247192, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.35026982995621625, |
|
"grad_norm": 0.29997819662094116, |
|
"learning_rate": 0.00015087347445535013, |
|
"loss": 1.8219, |
|
"mean_token_accuracy": 0.5533552631735802, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3543427349557072, |
|
"grad_norm": 0.290685772895813, |
|
"learning_rate": 0.00014973314028757787, |
|
"loss": 1.7261, |
|
"mean_token_accuracy": 0.5844682581722737, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.35841563995519804, |
|
"grad_norm": 0.34553012251853943, |
|
"learning_rate": 0.00014858414806089295, |
|
"loss": 1.6982, |
|
"mean_token_accuracy": 0.5762835793197155, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3624885449546889, |
|
"grad_norm": 0.2141156941652298, |
|
"learning_rate": 0.0001474266978037338, |
|
"loss": 1.5318, |
|
"mean_token_accuracy": 0.6048024773597718, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.36656144995417983, |
|
"grad_norm": 0.30456602573394775, |
|
"learning_rate": 0.00014626099101700018, |
|
"loss": 1.7901, |
|
"mean_token_accuracy": 0.5763920709490776, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3706343549536707, |
|
"grad_norm": 0.26921945810317993, |
|
"learning_rate": 0.00014508723063897376, |
|
"loss": 1.4936, |
|
"mean_token_accuracy": 0.6303243085741996, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.3747072599531616, |
|
"grad_norm": 0.28455570340156555, |
|
"learning_rate": 0.00014390562100998868, |
|
"loss": 1.5804, |
|
"mean_token_accuracy": 0.6074232332408428, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.3787801649526525, |
|
"grad_norm": 0.3388415575027466, |
|
"learning_rate": 0.00014271636783685777, |
|
"loss": 1.6731, |
|
"mean_token_accuracy": 0.5768752813339233, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.38285306995214335, |
|
"grad_norm": 0.4311608076095581, |
|
"learning_rate": 0.00014151967815706091, |
|
"loss": 1.7237, |
|
"mean_token_accuracy": 0.5706497602164745, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3869259749516343, |
|
"grad_norm": 0.35940027236938477, |
|
"learning_rate": 0.00014031576030270202, |
|
"loss": 1.5355, |
|
"mean_token_accuracy": 0.5908183179795742, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.39099887995112514, |
|
"grad_norm": 0.34071287512779236, |
|
"learning_rate": 0.00013910482386424023, |
|
"loss": 1.7247, |
|
"mean_token_accuracy": 0.5757749699056148, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.395071784950616, |
|
"grad_norm": 0.413870245218277, |
|
"learning_rate": 0.00013788707965400236, |
|
"loss": 1.6796, |
|
"mean_token_accuracy": 0.592286454886198, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.39914468995010693, |
|
"grad_norm": 0.2649496793746948, |
|
"learning_rate": 0.00013666273966948252, |
|
"loss": 1.5955, |
|
"mean_token_accuracy": 0.5936679825186729, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4032175949495978, |
|
"grad_norm": 0.3525199294090271, |
|
"learning_rate": 0.00013543201705643526, |
|
"loss": 1.647, |
|
"mean_token_accuracy": 0.5950982637703419, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.40729049994908867, |
|
"grad_norm": 0.33436283469200134, |
|
"learning_rate": 0.00013419512607176914, |
|
"loss": 1.7161, |
|
"mean_token_accuracy": 0.574284989386797, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4113634049485796, |
|
"grad_norm": 0.46867313981056213, |
|
"learning_rate": 0.00013295228204624648, |
|
"loss": 1.544, |
|
"mean_token_accuracy": 0.6102774910628795, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.41543630994807046, |
|
"grad_norm": 0.30373555421829224, |
|
"learning_rate": 0.00013170370134699653, |
|
"loss": 1.6287, |
|
"mean_token_accuracy": 0.5843084178864956, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4195092149475613, |
|
"grad_norm": 0.2981624901294708, |
|
"learning_rate": 0.00013044960133984804, |
|
"loss": 1.6858, |
|
"mean_token_accuracy": 0.5856122255325318, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.42358211994705225, |
|
"grad_norm": 0.3545626997947693, |
|
"learning_rate": 0.00012919020035148776, |
|
"loss": 1.7392, |
|
"mean_token_accuracy": 0.5841099888086319, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4276550249465431, |
|
"grad_norm": 0.2896677553653717, |
|
"learning_rate": 0.0001279257176314521, |
|
"loss": 1.5007, |
|
"mean_token_accuracy": 0.573243772238493, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.431727929946034, |
|
"grad_norm": 0.36384209990501404, |
|
"learning_rate": 0.00012665637331395785, |
|
"loss": 1.487, |
|
"mean_token_accuracy": 0.6025885075330735, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.4358008349455249, |
|
"grad_norm": 0.3681187033653259, |
|
"learning_rate": 0.00012538238837957882, |
|
"loss": 1.4913, |
|
"mean_token_accuracy": 0.5982382036745548, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.4398737399450158, |
|
"grad_norm": 0.2680988311767578, |
|
"learning_rate": 0.00012410398461677554, |
|
"loss": 1.6263, |
|
"mean_token_accuracy": 0.5956345148384571, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.4439466449445067, |
|
"grad_norm": 0.23174384236335754, |
|
"learning_rate": 0.00012282138458328358, |
|
"loss": 1.7378, |
|
"mean_token_accuracy": 0.590882021188736, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.44801954994399756, |
|
"grad_norm": 0.34088292717933655, |
|
"learning_rate": 0.00012153481156736892, |
|
"loss": 1.7385, |
|
"mean_token_accuracy": 0.5994494572281838, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.45209245494348843, |
|
"grad_norm": 0.24563632905483246, |
|
"learning_rate": 0.00012024448954895522, |
|
"loss": 1.5212, |
|
"mean_token_accuracy": 0.6165470741689205, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.45616535994297935, |
|
"grad_norm": 0.26980966329574585, |
|
"learning_rate": 0.00011895064316063127, |
|
"loss": 1.5254, |
|
"mean_token_accuracy": 0.5898841544985771, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.4602382649424702, |
|
"grad_norm": 0.32573202252388, |
|
"learning_rate": 0.00011765349764854461, |
|
"loss": 1.5704, |
|
"mean_token_accuracy": 0.6047514051198959, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.4643111699419611, |
|
"grad_norm": 0.3137454390525818, |
|
"learning_rate": 0.00011635327883318831, |
|
"loss": 1.5893, |
|
"mean_token_accuracy": 0.5792985640466213, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.468384074941452, |
|
"grad_norm": 0.368747353553772, |
|
"learning_rate": 0.00011505021307008785, |
|
"loss": 1.6388, |
|
"mean_token_accuracy": 0.5851111486554146, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.4724569799409429, |
|
"grad_norm": 0.325250506401062, |
|
"learning_rate": 0.00011374452721039477, |
|
"loss": 1.7192, |
|
"mean_token_accuracy": 0.5636343933641911, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.47652988494043375, |
|
"grad_norm": 0.32918378710746765, |
|
"learning_rate": 0.00011243644856139403, |
|
"loss": 1.6048, |
|
"mean_token_accuracy": 0.6072004094719887, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.48060278993992467, |
|
"grad_norm": 0.2892746031284332, |
|
"learning_rate": 0.00011112620484693223, |
|
"loss": 1.6785, |
|
"mean_token_accuracy": 0.5872686378657818, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.48467569493941554, |
|
"grad_norm": 0.2459000200033188, |
|
"learning_rate": 0.0001098140241677728, |
|
"loss": 1.5799, |
|
"mean_token_accuracy": 0.6077749952673912, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.4887485999389064, |
|
"grad_norm": 0.3696756660938263, |
|
"learning_rate": 0.00010850013496188606, |
|
"loss": 1.5966, |
|
"mean_token_accuracy": 0.5970290452241898, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4928215049383973, |
|
"grad_norm": 0.27681517601013184, |
|
"learning_rate": 0.00010718476596468028, |
|
"loss": 1.7161, |
|
"mean_token_accuracy": 0.5730410292744637, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.4968944099378882, |
|
"grad_norm": 0.2720302641391754, |
|
"learning_rate": 0.00010586814616918113, |
|
"loss": 1.6991, |
|
"mean_token_accuracy": 0.5764113113284111, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5009673149373791, |
|
"grad_norm": 0.34990179538726807, |
|
"learning_rate": 0.00010455050478616617, |
|
"loss": 1.7114, |
|
"mean_token_accuracy": 0.5776129819452762, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.50504021993687, |
|
"grad_norm": 0.33753877878189087, |
|
"learning_rate": 0.00010323207120426142, |
|
"loss": 1.8174, |
|
"mean_token_accuracy": 0.5551487416028976, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5091131249363609, |
|
"grad_norm": 0.41568267345428467, |
|
"learning_rate": 0.00010191307495000712, |
|
"loss": 1.799, |
|
"mean_token_accuracy": 0.5767477229237556, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5131860299358517, |
|
"grad_norm": 0.2747114300727844, |
|
"learning_rate": 0.00010059374564789932, |
|
"loss": 1.4763, |
|
"mean_token_accuracy": 0.6238099962472916, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5172589349353426, |
|
"grad_norm": 0.2458280771970749, |
|
"learning_rate": 9.927431298041441e-05, |
|
"loss": 1.5262, |
|
"mean_token_accuracy": 0.6056429393589496, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5213318399348336, |
|
"grad_norm": 0.2757134437561035, |
|
"learning_rate": 9.795500664802385e-05, |
|
"loss": 1.621, |
|
"mean_token_accuracy": 0.5842474676668644, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5254047449343244, |
|
"grad_norm": 0.21551673114299774, |
|
"learning_rate": 9.663605632920518e-05, |
|
"loss": 1.659, |
|
"mean_token_accuracy": 0.5935076788067818, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5294776499338153, |
|
"grad_norm": 0.5034237504005432, |
|
"learning_rate": 9.53176916404576e-05, |
|
"loss": 1.7666, |
|
"mean_token_accuracy": 0.5699214018881321, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5335505549333062, |
|
"grad_norm": 0.26525890827178955, |
|
"learning_rate": 9.400014209632763e-05, |
|
"loss": 1.6026, |
|
"mean_token_accuracy": 0.5935329027473927, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.537623459932797, |
|
"grad_norm": 0.28077974915504456, |
|
"learning_rate": 9.268363706945312e-05, |
|
"loss": 1.7769, |
|
"mean_token_accuracy": 0.5664741955697536, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5416963649322879, |
|
"grad_norm": 0.514976978302002, |
|
"learning_rate": 9.136840575063147e-05, |
|
"loss": 1.5157, |
|
"mean_token_accuracy": 0.6034789860248566, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5457692699317789, |
|
"grad_norm": 0.318249374628067, |
|
"learning_rate": 9.005467710891987e-05, |
|
"loss": 1.8756, |
|
"mean_token_accuracy": 0.5630597174167633, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.5498421749312697, |
|
"grad_norm": 0.24940232932567596, |
|
"learning_rate": 8.874267985177394e-05, |
|
"loss": 1.5708, |
|
"mean_token_accuracy": 0.5888857699930667, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5539150799307606, |
|
"grad_norm": 0.26299914717674255, |
|
"learning_rate": 8.743264238523199e-05, |
|
"loss": 1.6876, |
|
"mean_token_accuracy": 0.5782084472477436, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.5579879849302515, |
|
"grad_norm": 0.2588869333267212, |
|
"learning_rate": 8.612479277415174e-05, |
|
"loss": 1.6694, |
|
"mean_token_accuracy": 0.585976778715849, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.5620608899297423, |
|
"grad_norm": 0.2464841604232788, |
|
"learning_rate": 8.481935870250637e-05, |
|
"loss": 1.5838, |
|
"mean_token_accuracy": 0.605075704306364, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.5661337949292333, |
|
"grad_norm": 0.3231446146965027, |
|
"learning_rate": 8.351656743374709e-05, |
|
"loss": 1.6321, |
|
"mean_token_accuracy": 0.5716924026608468, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.5702066999287242, |
|
"grad_norm": 0.23010632395744324, |
|
"learning_rate": 8.22166457712386e-05, |
|
"loss": 1.5016, |
|
"mean_token_accuracy": 0.6048496462404728, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5742796049282151, |
|
"grad_norm": 0.3723667860031128, |
|
"learning_rate": 8.091982001877493e-05, |
|
"loss": 1.5412, |
|
"mean_token_accuracy": 0.6111127749085427, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.5783525099277059, |
|
"grad_norm": 0.24990710616111755, |
|
"learning_rate": 7.962631594118208e-05, |
|
"loss": 1.7629, |
|
"mean_token_accuracy": 0.5585654892027379, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.5824254149271968, |
|
"grad_norm": 0.3681967556476593, |
|
"learning_rate": 7.833635872501462e-05, |
|
"loss": 1.6342, |
|
"mean_token_accuracy": 0.5907308183610439, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.5864983199266877, |
|
"grad_norm": 0.3382493555545807, |
|
"learning_rate": 7.705017293935281e-05, |
|
"loss": 1.5803, |
|
"mean_token_accuracy": 0.6061145611107349, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.5905712249261786, |
|
"grad_norm": 0.28145501017570496, |
|
"learning_rate": 7.576798249670725e-05, |
|
"loss": 1.8459, |
|
"mean_token_accuracy": 0.5457224696874619, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5946441299256695, |
|
"grad_norm": 0.3189752697944641, |
|
"learning_rate": 7.449001061403809e-05, |
|
"loss": 1.5263, |
|
"mean_token_accuracy": 0.5937092356383801, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5987170349251604, |
|
"grad_norm": 0.2588890492916107, |
|
"learning_rate": 7.321647977389479e-05, |
|
"loss": 1.5965, |
|
"mean_token_accuracy": 0.5941358201205731, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6027899399246512, |
|
"grad_norm": 0.2777283191680908, |
|
"learning_rate": 7.194761168568445e-05, |
|
"loss": 1.5667, |
|
"mean_token_accuracy": 0.6003799811005592, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6068628449241421, |
|
"grad_norm": 0.23376941680908203, |
|
"learning_rate": 7.068362724707392e-05, |
|
"loss": 1.4813, |
|
"mean_token_accuracy": 0.6078310683369637, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.610935749923633, |
|
"grad_norm": 0.2295948565006256, |
|
"learning_rate": 6.942474650553408e-05, |
|
"loss": 1.6786, |
|
"mean_token_accuracy": 0.5886344678699971, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.615008654923124, |
|
"grad_norm": 0.3243666887283325, |
|
"learning_rate": 6.817118862003132e-05, |
|
"loss": 1.6343, |
|
"mean_token_accuracy": 0.5855603873729706, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6190815599226148, |
|
"grad_norm": 0.7187057733535767, |
|
"learning_rate": 6.692317182287432e-05, |
|
"loss": 1.8144, |
|
"mean_token_accuracy": 0.5671629451215268, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6231544649221057, |
|
"grad_norm": 0.35659492015838623, |
|
"learning_rate": 6.568091338172195e-05, |
|
"loss": 1.6117, |
|
"mean_token_accuracy": 0.601442601531744, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6272273699215966, |
|
"grad_norm": 0.3395217955112457, |
|
"learning_rate": 6.444462956175876e-05, |
|
"loss": 1.6222, |
|
"mean_token_accuracy": 0.5970501154661179, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.6313002749210874, |
|
"grad_norm": 0.26399192214012146, |
|
"learning_rate": 6.321453558804571e-05, |
|
"loss": 1.6048, |
|
"mean_token_accuracy": 0.5844796732068062, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6353731799205784, |
|
"grad_norm": 0.2993052899837494, |
|
"learning_rate": 6.199084560805121e-05, |
|
"loss": 1.7073, |
|
"mean_token_accuracy": 0.5789771333336831, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.6394460849200693, |
|
"grad_norm": 0.2676873505115509, |
|
"learning_rate": 6.077377265437043e-05, |
|
"loss": 1.8152, |
|
"mean_token_accuracy": 0.5734024614095687, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.6435189899195601, |
|
"grad_norm": 0.293557733297348, |
|
"learning_rate": 5.956352860763809e-05, |
|
"loss": 1.7108, |
|
"mean_token_accuracy": 0.5808110930025577, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.647591894919051, |
|
"grad_norm": 0.23729322850704193, |
|
"learning_rate": 5.83603241596423e-05, |
|
"loss": 1.4793, |
|
"mean_token_accuracy": 0.6202867470681668, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.6516647999185419, |
|
"grad_norm": 0.30609002709388733, |
|
"learning_rate": 5.716436877664517e-05, |
|
"loss": 1.752, |
|
"mean_token_accuracy": 0.5730870619416237, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 0.30717799067497253, |
|
"learning_rate": 5.5975870662916484e-05, |
|
"loss": 1.7172, |
|
"mean_token_accuracy": 0.5701417997479439, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.6598106099175237, |
|
"grad_norm": 0.44037064909935, |
|
"learning_rate": 5.4795036724487735e-05, |
|
"loss": 1.5377, |
|
"mean_token_accuracy": 0.6102925211191177, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.6638835149170146, |
|
"grad_norm": 0.24488377571105957, |
|
"learning_rate": 5.362207253313136e-05, |
|
"loss": 1.4547, |
|
"mean_token_accuracy": 0.6181615687906742, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.6679564199165055, |
|
"grad_norm": 0.2750435769557953, |
|
"learning_rate": 5.245718229057326e-05, |
|
"loss": 1.6086, |
|
"mean_token_accuracy": 0.5703060247004033, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.6720293249159963, |
|
"grad_norm": 0.2821342647075653, |
|
"learning_rate": 5.1300568792942535e-05, |
|
"loss": 1.6018, |
|
"mean_token_accuracy": 0.5989562854170799, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6761022299154872, |
|
"grad_norm": 0.22521165013313293, |
|
"learning_rate": 5.015243339546731e-05, |
|
"loss": 1.7574, |
|
"mean_token_accuracy": 0.5801547184586525, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.6801751349149782, |
|
"grad_norm": 0.29259297251701355, |
|
"learning_rate": 4.90129759774202e-05, |
|
"loss": 1.7425, |
|
"mean_token_accuracy": 0.5723637498915195, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.684248039914469, |
|
"grad_norm": 0.2705146074295044, |
|
"learning_rate": 4.7882394907321674e-05, |
|
"loss": 1.6121, |
|
"mean_token_accuracy": 0.6098110035061837, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.6883209449139599, |
|
"grad_norm": 0.2677505910396576, |
|
"learning_rate": 4.676088700840575e-05, |
|
"loss": 1.6416, |
|
"mean_token_accuracy": 0.5757282719016075, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.6923938499134508, |
|
"grad_norm": 0.2644527554512024, |
|
"learning_rate": 4.564864752435509e-05, |
|
"loss": 1.6675, |
|
"mean_token_accuracy": 0.6154301188886165, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6964667549129416, |
|
"grad_norm": 0.23048701882362366, |
|
"learning_rate": 4.454587008531097e-05, |
|
"loss": 1.6641, |
|
"mean_token_accuracy": 0.5855869121849537, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.7005396599124325, |
|
"grad_norm": 0.2789078652858734, |
|
"learning_rate": 4.345274667416399e-05, |
|
"loss": 1.6978, |
|
"mean_token_accuracy": 0.5762215368449688, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7046125649119235, |
|
"grad_norm": 0.271881103515625, |
|
"learning_rate": 4.2369467593131926e-05, |
|
"loss": 1.681, |
|
"mean_token_accuracy": 0.5667479492723941, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7086854699114143, |
|
"grad_norm": 0.24953240156173706, |
|
"learning_rate": 4.129622143062985e-05, |
|
"loss": 1.5405, |
|
"mean_token_accuracy": 0.6005463972687721, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7127583749109052, |
|
"grad_norm": 0.3925758898258209, |
|
"learning_rate": 4.02331950284387e-05, |
|
"loss": 1.7217, |
|
"mean_token_accuracy": 0.5689709268510341, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7168312799103961, |
|
"grad_norm": 0.2544846832752228, |
|
"learning_rate": 3.918057344917795e-05, |
|
"loss": 1.5948, |
|
"mean_token_accuracy": 0.5933421194553375, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.720904184909887, |
|
"grad_norm": 0.32760509848594666, |
|
"learning_rate": 3.813853994408793e-05, |
|
"loss": 1.6678, |
|
"mean_token_accuracy": 0.5856216661632061, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7249770899093778, |
|
"grad_norm": 0.2847062647342682, |
|
"learning_rate": 3.7107275921127704e-05, |
|
"loss": 1.682, |
|
"mean_token_accuracy": 0.5889982558786869, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.7290499949088688, |
|
"grad_norm": 0.22774401307106018, |
|
"learning_rate": 3.60869609133936e-05, |
|
"loss": 1.7135, |
|
"mean_token_accuracy": 0.5773006275296211, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.7331228999083597, |
|
"grad_norm": 0.2606080174446106, |
|
"learning_rate": 3.507777254786425e-05, |
|
"loss": 1.4999, |
|
"mean_token_accuracy": 0.6269011601805687, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7371958049078505, |
|
"grad_norm": 0.2962757647037506, |
|
"learning_rate": 3.407988651447738e-05, |
|
"loss": 1.6202, |
|
"mean_token_accuracy": 0.5973276488482953, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.7412687099073414, |
|
"grad_norm": 0.29107147455215454, |
|
"learning_rate": 3.3093476535544074e-05, |
|
"loss": 1.5502, |
|
"mean_token_accuracy": 0.6133273020386696, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.7453416149068323, |
|
"grad_norm": 0.20980948209762573, |
|
"learning_rate": 3.211871433550513e-05, |
|
"loss": 1.6333, |
|
"mean_token_accuracy": 0.6155988665297627, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.7494145199063232, |
|
"grad_norm": 0.24882718920707703, |
|
"learning_rate": 3.1155769611035825e-05, |
|
"loss": 1.4907, |
|
"mean_token_accuracy": 0.6201219961047173, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.7534874249058141, |
|
"grad_norm": 0.23715901374816895, |
|
"learning_rate": 3.0204810001503124e-05, |
|
"loss": 1.8018, |
|
"mean_token_accuracy": 0.5756942637264728, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.757560329905305, |
|
"grad_norm": 0.35216882824897766, |
|
"learning_rate": 2.9266001059781258e-05, |
|
"loss": 1.7305, |
|
"mean_token_accuracy": 0.5722471877932549, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.7616332349047958, |
|
"grad_norm": 0.2924104332923889, |
|
"learning_rate": 2.83395062234308e-05, |
|
"loss": 1.6642, |
|
"mean_token_accuracy": 0.58627370595932, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.7657061399042867, |
|
"grad_norm": 0.27772393822669983, |
|
"learning_rate": 2.742548678624548e-05, |
|
"loss": 1.8349, |
|
"mean_token_accuracy": 0.5614061944186688, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.7697790449037776, |
|
"grad_norm": 0.31574469804763794, |
|
"learning_rate": 2.6524101870172846e-05, |
|
"loss": 1.7883, |
|
"mean_token_accuracy": 0.561104378849268, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.7738519499032686, |
|
"grad_norm": 0.253779798746109, |
|
"learning_rate": 2.5635508397612262e-05, |
|
"loss": 1.6654, |
|
"mean_token_accuracy": 0.5888113439083099, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7779248549027594, |
|
"grad_norm": 0.2504970133304596, |
|
"learning_rate": 2.4759861064096603e-05, |
|
"loss": 1.6478, |
|
"mean_token_accuracy": 0.5726306334137916, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.7819977599022503, |
|
"grad_norm": 0.23571030795574188, |
|
"learning_rate": 2.3897312311360955e-05, |
|
"loss": 1.5355, |
|
"mean_token_accuracy": 0.6026113323867321, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.7860706649017412, |
|
"grad_norm": 0.2395690232515335, |
|
"learning_rate": 2.3048012300804222e-05, |
|
"loss": 1.5565, |
|
"mean_token_accuracy": 0.5976604223251343, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.790143569901232, |
|
"grad_norm": 0.5269713997840881, |
|
"learning_rate": 2.221210888734736e-05, |
|
"loss": 1.636, |
|
"mean_token_accuracy": 0.5818449839949608, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.7942164749007229, |
|
"grad_norm": 0.4233987033367157, |
|
"learning_rate": 2.13897475936933e-05, |
|
"loss": 1.7844, |
|
"mean_token_accuracy": 0.5720866233110428, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.7982893799002139, |
|
"grad_norm": 0.2641923427581787, |
|
"learning_rate": 2.0581071584992818e-05, |
|
"loss": 1.5874, |
|
"mean_token_accuracy": 0.5966846913099288, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.8023622848997047, |
|
"grad_norm": 0.27280351519584656, |
|
"learning_rate": 1.9786221643920844e-05, |
|
"loss": 1.6279, |
|
"mean_token_accuracy": 0.5751761384308338, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.8064351898991956, |
|
"grad_norm": 0.3823714256286621, |
|
"learning_rate": 1.9005336146167686e-05, |
|
"loss": 1.6269, |
|
"mean_token_accuracy": 0.5963201723992825, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.8105080948986865, |
|
"grad_norm": 0.25173816084861755, |
|
"learning_rate": 1.8238551036349028e-05, |
|
"loss": 1.5308, |
|
"mean_token_accuracy": 0.6112879984080791, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.8145809998981773, |
|
"grad_norm": 0.21256780624389648, |
|
"learning_rate": 1.7485999804339348e-05, |
|
"loss": 1.5568, |
|
"mean_token_accuracy": 0.5963364981114865, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8186539048976683, |
|
"grad_norm": 0.2510949969291687, |
|
"learning_rate": 1.6747813462032615e-05, |
|
"loss": 1.6787, |
|
"mean_token_accuracy": 0.58960345312953, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.8227268098971592, |
|
"grad_norm": 0.255790650844574, |
|
"learning_rate": 1.6024120520534326e-05, |
|
"loss": 1.6416, |
|
"mean_token_accuracy": 0.5875880800187587, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.82679971489665, |
|
"grad_norm": 0.307492196559906, |
|
"learning_rate": 1.5315046967789082e-05, |
|
"loss": 1.69, |
|
"mean_token_accuracy": 0.5625761769711971, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.8308726198961409, |
|
"grad_norm": 0.2648999094963074, |
|
"learning_rate": 1.4620716246647203e-05, |
|
"loss": 1.6092, |
|
"mean_token_accuracy": 0.6106476083397865, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.8349455248956318, |
|
"grad_norm": 0.2488166093826294, |
|
"learning_rate": 1.394124923337462e-05, |
|
"loss": 1.6848, |
|
"mean_token_accuracy": 0.5697021905332804, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.8390184298951227, |
|
"grad_norm": 0.2427694946527481, |
|
"learning_rate": 1.3276764216609294e-05, |
|
"loss": 1.5843, |
|
"mean_token_accuracy": 0.6084981314837933, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.8430913348946136, |
|
"grad_norm": 0.2833966910839081, |
|
"learning_rate": 1.2627376876768593e-05, |
|
"loss": 1.5443, |
|
"mean_token_accuracy": 0.6015144042670727, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.8471642398941045, |
|
"grad_norm": 0.4057978689670563, |
|
"learning_rate": 1.1993200265910131e-05, |
|
"loss": 1.6073, |
|
"mean_token_accuracy": 0.5917512811720371, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.8512371448935954, |
|
"grad_norm": 0.25613030791282654, |
|
"learning_rate": 1.1374344788050829e-05, |
|
"loss": 1.8038, |
|
"mean_token_accuracy": 0.5568435616791249, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.8553100498930862, |
|
"grad_norm": 0.30181950330734253, |
|
"learning_rate": 1.0770918179946388e-05, |
|
"loss": 1.5022, |
|
"mean_token_accuracy": 0.6081097513437271, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.8593829548925771, |
|
"grad_norm": 0.23373402655124664, |
|
"learning_rate": 1.0183025492335408e-05, |
|
"loss": 1.7432, |
|
"mean_token_accuracy": 0.5653887689113617, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.863455859892068, |
|
"grad_norm": 0.2826649248600006, |
|
"learning_rate": 9.610769071651193e-06, |
|
"loss": 1.6706, |
|
"mean_token_accuracy": 0.5875243842601776, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.867528764891559, |
|
"grad_norm": 0.3047688603401184, |
|
"learning_rate": 9.05424854220408e-06, |
|
"loss": 1.5901, |
|
"mean_token_accuracy": 0.6013362683355808, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.8716016698910498, |
|
"grad_norm": 0.3211512863636017, |
|
"learning_rate": 8.513560788837916e-06, |
|
"loss": 1.6414, |
|
"mean_token_accuracy": 0.5845984369516373, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.8756745748905407, |
|
"grad_norm": 0.22475050389766693, |
|
"learning_rate": 7.988799940063297e-06, |
|
"loss": 1.6038, |
|
"mean_token_accuracy": 0.5835995152592659, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.8797474798900315, |
|
"grad_norm": 0.2239948809146881, |
|
"learning_rate": 7.480057351670688e-06, |
|
"loss": 1.6661, |
|
"mean_token_accuracy": 0.5898953646421432, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.8838203848895224, |
|
"grad_norm": 0.3669275641441345, |
|
"learning_rate": 6.987421590826282e-06, |
|
"loss": 1.6066, |
|
"mean_token_accuracy": 0.5877827815711498, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.8878932898890134, |
|
"grad_norm": 0.30003634095191956, |
|
"learning_rate": 6.510978420653335e-06, |
|
"loss": 1.6816, |
|
"mean_token_accuracy": 0.5926426865160466, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.8919661948885043, |
|
"grad_norm": 0.2707299590110779, |
|
"learning_rate": 6.050810785301597e-06, |
|
"loss": 1.7702, |
|
"mean_token_accuracy": 0.561020129173994, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.8960390998879951, |
|
"grad_norm": 0.3029952347278595, |
|
"learning_rate": 5.606998795507578e-06, |
|
"loss": 1.5417, |
|
"mean_token_accuracy": 0.598423033952713, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.900112004887486, |
|
"grad_norm": 0.27840766310691833, |
|
"learning_rate": 5.1796197146479985e-06, |
|
"loss": 1.5119, |
|
"mean_token_accuracy": 0.6152562454342843, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.9041849098869769, |
|
"grad_norm": 0.28235796093940735, |
|
"learning_rate": 4.768747945288987e-06, |
|
"loss": 1.5287, |
|
"mean_token_accuracy": 0.61318289488554, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.9082578148864677, |
|
"grad_norm": 0.21450947225093842, |
|
"learning_rate": 4.37445501623337e-06, |
|
"loss": 1.5842, |
|
"mean_token_accuracy": 0.6025399126112461, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.9123307198859587, |
|
"grad_norm": 0.29954469203948975, |
|
"learning_rate": 3.996809570068127e-06, |
|
"loss": 1.5514, |
|
"mean_token_accuracy": 0.6040661752223968, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.9164036248854496, |
|
"grad_norm": 0.34261876344680786, |
|
"learning_rate": 3.635877351214445e-06, |
|
"loss": 1.5493, |
|
"mean_token_accuracy": 0.5996488876640796, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9204765298849404, |
|
"grad_norm": 0.24511079490184784, |
|
"learning_rate": 3.291721194482189e-06, |
|
"loss": 1.5494, |
|
"mean_token_accuracy": 0.6054005287587643, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.9245494348844313, |
|
"grad_norm": 0.21510252356529236, |
|
"learning_rate": 2.9644010141310017e-06, |
|
"loss": 1.6294, |
|
"mean_token_accuracy": 0.5961603626608849, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.9286223398839222, |
|
"grad_norm": 0.23636655509471893, |
|
"learning_rate": 2.65397379343979e-06, |
|
"loss": 1.7332, |
|
"mean_token_accuracy": 0.5859133303165436, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.932695244883413, |
|
"grad_norm": 0.25582408905029297, |
|
"learning_rate": 2.3604935747865377e-06, |
|
"loss": 1.6691, |
|
"mean_token_accuracy": 0.5889919593930244, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.936768149882904, |
|
"grad_norm": 0.3853449523448944, |
|
"learning_rate": 2.0840114502400086e-06, |
|
"loss": 1.5358, |
|
"mean_token_accuracy": 0.5844359740614891, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9408410548823949, |
|
"grad_norm": 0.2177136093378067, |
|
"learning_rate": 1.8245755526650753e-06, |
|
"loss": 1.6318, |
|
"mean_token_accuracy": 0.5915890723466873, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.9449139598818858, |
|
"grad_norm": 0.23138591647148132, |
|
"learning_rate": 1.5822310473433411e-06, |
|
"loss": 1.5595, |
|
"mean_token_accuracy": 0.5974130786955356, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.9489868648813766, |
|
"grad_norm": 0.2235519289970398, |
|
"learning_rate": 1.357020124110231e-06, |
|
"loss": 1.7522, |
|
"mean_token_accuracy": 0.5713608346879482, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.9530597698808675, |
|
"grad_norm": 0.37900933623313904, |
|
"learning_rate": 1.1489819900101784e-06, |
|
"loss": 1.5307, |
|
"mean_token_accuracy": 0.6045880667865277, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.9571326748803585, |
|
"grad_norm": 0.2911360561847687, |
|
"learning_rate": 9.581528624710734e-07, |
|
"loss": 1.5633, |
|
"mean_token_accuracy": 0.5826431967318058, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.9612055798798493, |
|
"grad_norm": 0.25369352102279663, |
|
"learning_rate": 7.845659629990842e-07, |
|
"loss": 1.6927, |
|
"mean_token_accuracy": 0.5901580177247524, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.9652784848793402, |
|
"grad_norm": 0.32107028365135193, |
|
"learning_rate": 6.282515113952281e-07, |
|
"loss": 1.815, |
|
"mean_token_accuracy": 0.56534923017025, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.9693513898788311, |
|
"grad_norm": 0.3105465769767761, |
|
"learning_rate": 4.892367204943016e-07, |
|
"loss": 1.5694, |
|
"mean_token_accuracy": 0.5809950686991214, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.9734242948783219, |
|
"grad_norm": 0.2689298689365387, |
|
"learning_rate": 3.6754579142741495e-07, |
|
"loss": 1.6555, |
|
"mean_token_accuracy": 0.591179046779871, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.9774971998778128, |
|
"grad_norm": 0.44850870966911316, |
|
"learning_rate": 2.6319990940885107e-07, |
|
"loss": 1.7315, |
|
"mean_token_accuracy": 0.5772897489368916, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9815701048773038, |
|
"grad_norm": 0.25496381521224976, |
|
"learning_rate": 1.762172400478601e-07, |
|
"loss": 1.5847, |
|
"mean_token_accuracy": 0.5798953503370285, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.9856430098767947, |
|
"grad_norm": 0.2383822500705719, |
|
"learning_rate": 1.0661292618624474e-07, |
|
"loss": 1.54, |
|
"mean_token_accuracy": 0.6138455606997013, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.9897159148762855, |
|
"grad_norm": 0.2854715585708618, |
|
"learning_rate": 5.439908526212456e-08, |
|
"loss": 1.4109, |
|
"mean_token_accuracy": 0.6151122771203518, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.9937888198757764, |
|
"grad_norm": 0.297370046377182, |
|
"learning_rate": 1.9584807200423438e-08, |
|
"loss": 1.5128, |
|
"mean_token_accuracy": 0.6013165354728699, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.9978617248752673, |
|
"grad_norm": 0.2563394010066986, |
|
"learning_rate": 2.176152830357658e-09, |
|
"loss": 1.6287, |
|
"mean_token_accuracy": 0.5945099242031574, |
|
"step": 2450 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2455, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.283473658609664e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|