| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9997473684210526, |
| "eval_steps": 500, |
| "global_step": 1484, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006736842105263158, |
| "grad_norm": 3.6967623233795166, |
| "learning_rate": 1.3422818791946309e-06, |
| "loss": 2.4093, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006736842105263158, |
| "grad_norm": 2.5490193367004395, |
| "learning_rate": 1.3422818791946309e-05, |
| "loss": 2.4939, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013473684210526317, |
| "grad_norm": 0.18483224511146545, |
| "learning_rate": 2.6845637583892618e-05, |
| "loss": 1.1877, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.020210526315789474, |
| "grad_norm": 0.2031693309545517, |
| "learning_rate": 4.026845637583892e-05, |
| "loss": 0.8909, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.026947368421052633, |
| "grad_norm": 0.6876732707023621, |
| "learning_rate": 5.3691275167785237e-05, |
| "loss": 0.7581, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03368421052631579, |
| "grad_norm": 0.09247241914272308, |
| "learning_rate": 6.711409395973155e-05, |
| "loss": 0.7594, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04042105263157895, |
| "grad_norm": 0.1324968934059143, |
| "learning_rate": 8.053691275167784e-05, |
| "loss": 0.7405, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04715789473684211, |
| "grad_norm": 0.05673883110284805, |
| "learning_rate": 9.395973154362417e-05, |
| "loss": 0.7065, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.053894736842105266, |
| "grad_norm": 0.04617280140519142, |
| "learning_rate": 0.00010738255033557047, |
| "loss": 0.6817, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06063157894736842, |
| "grad_norm": 0.04381496459245682, |
| "learning_rate": 0.0001208053691275168, |
| "loss": 0.6789, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06736842105263158, |
| "grad_norm": 0.07428538799285889, |
| "learning_rate": 0.0001342281879194631, |
| "loss": 0.6816, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07410526315789474, |
| "grad_norm": 0.04249708354473114, |
| "learning_rate": 0.00014765100671140942, |
| "loss": 0.6997, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0808421052631579, |
| "grad_norm": 0.05957937240600586, |
| "learning_rate": 0.0001610738255033557, |
| "loss": 0.6807, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08757894736842105, |
| "grad_norm": 0.03975442424416542, |
| "learning_rate": 0.000174496644295302, |
| "loss": 0.6733, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09431578947368421, |
| "grad_norm": 0.04079463332891464, |
| "learning_rate": 0.00018791946308724833, |
| "loss": 0.6556, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10105263157894737, |
| "grad_norm": 0.04245497286319733, |
| "learning_rate": 0.00019985018726591762, |
| "loss": 0.6575, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10778947368421053, |
| "grad_norm": 0.09695123136043549, |
| "learning_rate": 0.00019835205992509364, |
| "loss": 0.6916, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11452631578947368, |
| "grad_norm": 0.03505201265215874, |
| "learning_rate": 0.00019685393258426966, |
| "loss": 0.6622, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12126315789473684, |
| "grad_norm": 0.02820334956049919, |
| "learning_rate": 0.0001953558052434457, |
| "loss": 0.6497, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.04135354235768318, |
| "learning_rate": 0.00019385767790262173, |
| "loss": 0.6671, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.13473684210526315, |
| "grad_norm": 0.031461067497730255, |
| "learning_rate": 0.00019235955056179775, |
| "loss": 0.657, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1414736842105263, |
| "grad_norm": 0.04208710789680481, |
| "learning_rate": 0.0001908614232209738, |
| "loss": 0.6766, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1482105263157895, |
| "grad_norm": 3.495147705078125, |
| "learning_rate": 0.00018936329588014982, |
| "loss": 3.9378, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.15494736842105264, |
| "grad_norm": 0.18893112242221832, |
| "learning_rate": 0.00018786516853932586, |
| "loss": 7.1374, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1616842105263158, |
| "grad_norm": 0.0959916040301323, |
| "learning_rate": 0.00018636704119850189, |
| "loss": 5.8104, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.16842105263157894, |
| "grad_norm": 0.08286964148283005, |
| "learning_rate": 0.0001848689138576779, |
| "loss": 4.7292, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1751578947368421, |
| "grad_norm": 0.04510454833507538, |
| "learning_rate": 0.00018337078651685393, |
| "loss": 4.9858, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.18189473684210528, |
| "grad_norm": 0.2256896197795868, |
| "learning_rate": 0.00018187265917602997, |
| "loss": 4.7463, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.18863157894736843, |
| "grad_norm": 0.06342379748821259, |
| "learning_rate": 0.00018037453183520602, |
| "loss": 4.517, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.19536842105263158, |
| "grad_norm": 0.07497289776802063, |
| "learning_rate": 0.00017887640449438204, |
| "loss": 4.4052, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.20210526315789473, |
| "grad_norm": 0.08952877670526505, |
| "learning_rate": 0.00017737827715355806, |
| "loss": 3.9614, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.20884210526315788, |
| "grad_norm": 0.044066932052373886, |
| "learning_rate": 0.00017588014981273408, |
| "loss": 4.5861, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.21557894736842106, |
| "grad_norm": 0.08251778781414032, |
| "learning_rate": 0.0001743820224719101, |
| "loss": 4.5163, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.22231578947368422, |
| "grad_norm": 0.04723803699016571, |
| "learning_rate": 0.00017288389513108615, |
| "loss": 4.1904, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.22905263157894737, |
| "grad_norm": 0.09082615375518799, |
| "learning_rate": 0.0001713857677902622, |
| "loss": 4.1982, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.23578947368421052, |
| "grad_norm": 0.04866361245512962, |
| "learning_rate": 0.00016988764044943822, |
| "loss": 3.8506, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.24252631578947367, |
| "grad_norm": 0.04515402019023895, |
| "learning_rate": 0.00016838951310861424, |
| "loss": 4.4254, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.24926315789473685, |
| "grad_norm": 0.14205284416675568, |
| "learning_rate": 0.00016689138576779026, |
| "loss": 4.4111, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.16082021594047546, |
| "learning_rate": 0.0001653932584269663, |
| "loss": 4.1119, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.26273684210526316, |
| "grad_norm": 0.061411116272211075, |
| "learning_rate": 0.00016389513108614235, |
| "loss": 4.059, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2694736842105263, |
| "grad_norm": 0.058379318565130234, |
| "learning_rate": 0.00016239700374531837, |
| "loss": 3.7307, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.27621052631578946, |
| "grad_norm": 0.048859789967536926, |
| "learning_rate": 0.0001608988764044944, |
| "loss": 4.3039, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2829473684210526, |
| "grad_norm": 0.06003361940383911, |
| "learning_rate": 0.0001594007490636704, |
| "loss": 4.2032, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.28968421052631577, |
| "grad_norm": 0.10120591521263123, |
| "learning_rate": 0.00015790262172284646, |
| "loss": 3.9567, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.296421052631579, |
| "grad_norm": 0.21033401787281036, |
| "learning_rate": 0.00015640449438202248, |
| "loss": 3.9369, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3031578947368421, |
| "grad_norm": 0.06378967314958572, |
| "learning_rate": 0.00015490636704119852, |
| "loss": 3.6318, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3098947368421053, |
| "grad_norm": 0.042198359966278076, |
| "learning_rate": 0.00015340823970037455, |
| "loss": 4.1789, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.31663157894736843, |
| "grad_norm": 0.053648848086595535, |
| "learning_rate": 0.00015191011235955057, |
| "loss": 4.1562, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3233684210526316, |
| "grad_norm": 0.0808805301785469, |
| "learning_rate": 0.00015041198501872659, |
| "loss": 3.8883, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.33010526315789473, |
| "grad_norm": 0.13895294070243835, |
| "learning_rate": 0.00014891385767790263, |
| "loss": 3.9055, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3368421052631579, |
| "grad_norm": 0.11999215185642242, |
| "learning_rate": 0.00014741573033707865, |
| "loss": 3.6025, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.34357894736842104, |
| "grad_norm": 0.0969998687505722, |
| "learning_rate": 0.0001459176029962547, |
| "loss": 4.2401, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3503157894736842, |
| "grad_norm": 0.2578948438167572, |
| "learning_rate": 0.00014441947565543072, |
| "loss": 4.1355, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.35705263157894734, |
| "grad_norm": 0.067634217441082, |
| "learning_rate": 0.00014292134831460674, |
| "loss": 3.8735, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.36378947368421055, |
| "grad_norm": 0.1961352676153183, |
| "learning_rate": 0.0001414232209737828, |
| "loss": 3.7641, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3705263157894737, |
| "grad_norm": 0.07940343767404556, |
| "learning_rate": 0.0001399250936329588, |
| "loss": 3.5177, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.37726315789473686, |
| "grad_norm": 1.3029491901397705, |
| "learning_rate": 0.00013842696629213483, |
| "loss": 4.1854, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.10544762760400772, |
| "learning_rate": 0.00013692883895131088, |
| "loss": 4.3064, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.39073684210526316, |
| "grad_norm": 0.150394469499588, |
| "learning_rate": 0.0001354307116104869, |
| "loss": 3.9517, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3974736842105263, |
| "grad_norm": 0.06921563297510147, |
| "learning_rate": 0.00013393258426966294, |
| "loss": 3.8917, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.40421052631578946, |
| "grad_norm": 0.06402010470628738, |
| "learning_rate": 0.00013243445692883896, |
| "loss": 3.5635, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4109473684210526, |
| "grad_norm": 0.08918313682079315, |
| "learning_rate": 0.00013093632958801498, |
| "loss": 4.1197, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.41768421052631577, |
| "grad_norm": 0.054397523403167725, |
| "learning_rate": 0.000129438202247191, |
| "loss": 4.0442, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4244210526315789, |
| "grad_norm": 0.068702831864357, |
| "learning_rate": 0.00012794007490636705, |
| "loss": 3.7506, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.43115789473684213, |
| "grad_norm": 0.14575353264808655, |
| "learning_rate": 0.0001264419475655431, |
| "loss": 3.7359, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4378947368421053, |
| "grad_norm": 0.1481335461139679, |
| "learning_rate": 0.00012494382022471912, |
| "loss": 3.3705, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.44463157894736843, |
| "grad_norm": 0.06438197940587997, |
| "learning_rate": 0.00012344569288389514, |
| "loss": 4.0248, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4513684210526316, |
| "grad_norm": 0.38855019211769104, |
| "learning_rate": 0.00012194756554307116, |
| "loss": 4.0265, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.45810526315789474, |
| "grad_norm": 0.20793034136295319, |
| "learning_rate": 0.00012044943820224719, |
| "loss": 3.7305, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4648421052631579, |
| "grad_norm": 0.11011853814125061, |
| "learning_rate": 0.00011895131086142324, |
| "loss": 3.6933, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.47157894736842104, |
| "grad_norm": 0.06795340031385422, |
| "learning_rate": 0.00011745318352059926, |
| "loss": 3.3734, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4783157894736842, |
| "grad_norm": 0.07788679003715515, |
| "learning_rate": 0.00011595505617977529, |
| "loss": 3.9053, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.48505263157894735, |
| "grad_norm": 0.07339611649513245, |
| "learning_rate": 0.00011445692883895131, |
| "loss": 3.8685, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4917894736842105, |
| "grad_norm": 0.16048288345336914, |
| "learning_rate": 0.00011295880149812735, |
| "loss": 3.5673, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4985263157894737, |
| "grad_norm": 0.2596355974674225, |
| "learning_rate": 0.00011146067415730337, |
| "loss": 3.5684, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5052631578947369, |
| "grad_norm": 0.10115884989500046, |
| "learning_rate": 0.00010996254681647941, |
| "loss": 3.2226, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.13997367024421692, |
| "learning_rate": 0.00010846441947565545, |
| "loss": 3.8579, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5187368421052632, |
| "grad_norm": 0.08359155058860779, |
| "learning_rate": 0.00010696629213483147, |
| "loss": 3.8313, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5254736842105263, |
| "grad_norm": 0.2407791018486023, |
| "learning_rate": 0.0001054681647940075, |
| "loss": 3.5257, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5322105263157895, |
| "grad_norm": 0.34615418314933777, |
| "learning_rate": 0.00010397003745318352, |
| "loss": 3.5113, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5389473684210526, |
| "grad_norm": 0.06987264007329941, |
| "learning_rate": 0.00010247191011235954, |
| "loss": 3.1525, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5456842105263158, |
| "grad_norm": 0.07933894544839859, |
| "learning_rate": 0.00010097378277153558, |
| "loss": 3.718, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5524210526315789, |
| "grad_norm": 0.12424171715974808, |
| "learning_rate": 9.947565543071161e-05, |
| "loss": 3.6641, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5591578947368421, |
| "grad_norm": 0.2515564262866974, |
| "learning_rate": 9.797752808988764e-05, |
| "loss": 3.4268, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5658947368421052, |
| "grad_norm": 0.30851560831069946, |
| "learning_rate": 9.647940074906368e-05, |
| "loss": 3.3856, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5726315789473684, |
| "grad_norm": 0.05149822682142258, |
| "learning_rate": 9.49812734082397e-05, |
| "loss": 3.1259, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5793684210526315, |
| "grad_norm": 0.17960771918296814, |
| "learning_rate": 9.348314606741574e-05, |
| "loss": 3.6767, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5861052631578947, |
| "grad_norm": 0.17523854970932007, |
| "learning_rate": 9.198501872659176e-05, |
| "loss": 3.5995, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.592842105263158, |
| "grad_norm": 0.3186163008213043, |
| "learning_rate": 9.04868913857678e-05, |
| "loss": 3.3966, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5995789473684211, |
| "grad_norm": 0.21263690292835236, |
| "learning_rate": 8.898876404494383e-05, |
| "loss": 3.3526, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6063157894736843, |
| "grad_norm": 0.10399254411458969, |
| "learning_rate": 8.749063670411985e-05, |
| "loss": 3.0519, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6130526315789474, |
| "grad_norm": 0.13143524527549744, |
| "learning_rate": 8.599250936329589e-05, |
| "loss": 3.629, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6197894736842106, |
| "grad_norm": 0.15374666452407837, |
| "learning_rate": 8.449438202247192e-05, |
| "loss": 3.6895, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6265263157894737, |
| "grad_norm": 0.23757484555244446, |
| "learning_rate": 8.299625468164794e-05, |
| "loss": 3.3622, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6332631578947369, |
| "grad_norm": 0.1661984622478485, |
| "learning_rate": 8.149812734082397e-05, |
| "loss": 3.3248, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.08603614568710327, |
| "learning_rate": 8e-05, |
| "loss": 3.0086, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6467368421052632, |
| "grad_norm": 0.07694745808839798, |
| "learning_rate": 7.850187265917604e-05, |
| "loss": 3.5162, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6534736842105263, |
| "grad_norm": 0.16395558416843414, |
| "learning_rate": 7.700374531835206e-05, |
| "loss": 3.4812, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6602105263157895, |
| "grad_norm": 0.13817398250102997, |
| "learning_rate": 7.55056179775281e-05, |
| "loss": 3.2516, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6669473684210526, |
| "grad_norm": 0.25807198882102966, |
| "learning_rate": 7.400749063670413e-05, |
| "loss": 3.2101, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6736842105263158, |
| "grad_norm": 0.06848172843456268, |
| "learning_rate": 7.250936329588015e-05, |
| "loss": 2.93, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6804210526315789, |
| "grad_norm": 1.089575171470642, |
| "learning_rate": 7.101123595505618e-05, |
| "loss": 3.4925, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6871578947368421, |
| "grad_norm": 0.20126965641975403, |
| "learning_rate": 6.951310861423222e-05, |
| "loss": 3.4603, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6938947368421052, |
| "grad_norm": 0.21779027581214905, |
| "learning_rate": 6.801498127340824e-05, |
| "loss": 3.1723, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7006315789473684, |
| "grad_norm": 0.18239159882068634, |
| "learning_rate": 6.651685393258428e-05, |
| "loss": 3.1903, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7073684210526315, |
| "grad_norm": 0.06677573919296265, |
| "learning_rate": 6.50187265917603e-05, |
| "loss": 2.8445, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7141052631578947, |
| "grad_norm": 0.42619746923446655, |
| "learning_rate": 6.352059925093634e-05, |
| "loss": 3.4319, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.7208421052631578, |
| "grad_norm": 0.12023507058620453, |
| "learning_rate": 6.202247191011237e-05, |
| "loss": 3.3826, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7275789473684211, |
| "grad_norm": 0.15099403262138367, |
| "learning_rate": 6.052434456928839e-05, |
| "loss": 3.1425, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7343157894736843, |
| "grad_norm": 0.3474717438220978, |
| "learning_rate": 5.902621722846442e-05, |
| "loss": 3.1279, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7410526315789474, |
| "grad_norm": 0.12225649505853653, |
| "learning_rate": 5.752808988764046e-05, |
| "loss": 2.9033, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7477894736842106, |
| "grad_norm": 0.19639068841934204, |
| "learning_rate": 5.6029962546816485e-05, |
| "loss": 3.3681, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7545263157894737, |
| "grad_norm": 0.10571427643299103, |
| "learning_rate": 5.453183520599251e-05, |
| "loss": 3.335, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7612631578947369, |
| "grad_norm": 0.5154901146888733, |
| "learning_rate": 5.3033707865168545e-05, |
| "loss": 3.0952, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.6122628450393677, |
| "learning_rate": 5.153558052434457e-05, |
| "loss": 3.1269, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7747368421052632, |
| "grad_norm": 0.19698569178581238, |
| "learning_rate": 5.00374531835206e-05, |
| "loss": 2.8233, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7814736842105263, |
| "grad_norm": 0.13018374145030975, |
| "learning_rate": 4.853932584269663e-05, |
| "loss": 3.3094, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7882105263157895, |
| "grad_norm": 0.09522128850221634, |
| "learning_rate": 4.704119850187266e-05, |
| "loss": 3.2765, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7949473684210526, |
| "grad_norm": 0.10098107159137726, |
| "learning_rate": 4.554307116104869e-05, |
| "loss": 3.0807, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8016842105263158, |
| "grad_norm": 0.18019132316112518, |
| "learning_rate": 4.404494382022472e-05, |
| "loss": 3.0332, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8084210526315789, |
| "grad_norm": 0.16289708018302917, |
| "learning_rate": 4.2546816479400754e-05, |
| "loss": 2.7374, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8151578947368421, |
| "grad_norm": 0.12666673958301544, |
| "learning_rate": 4.104868913857678e-05, |
| "loss": 3.2118, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8218947368421052, |
| "grad_norm": 0.16891352832317352, |
| "learning_rate": 3.955056179775281e-05, |
| "loss": 3.1902, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.8286315789473684, |
| "grad_norm": 0.10958009213209152, |
| "learning_rate": 3.805243445692884e-05, |
| "loss": 2.9862, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.8353684210526315, |
| "grad_norm": 0.10642745345830917, |
| "learning_rate": 3.655430711610487e-05, |
| "loss": 3.0052, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.05656813085079193, |
| "learning_rate": 3.50561797752809e-05, |
| "loss": 2.723, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8488421052631578, |
| "grad_norm": 0.08322717994451523, |
| "learning_rate": 3.355805243445693e-05, |
| "loss": 3.234, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.8555789473684211, |
| "grad_norm": 0.13246551156044006, |
| "learning_rate": 3.2059925093632956e-05, |
| "loss": 3.212, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.8623157894736843, |
| "grad_norm": 0.10225304961204529, |
| "learning_rate": 3.056179775280899e-05, |
| "loss": 2.9484, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8690526315789474, |
| "grad_norm": 0.19440552592277527, |
| "learning_rate": 2.9063670411985024e-05, |
| "loss": 2.9266, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.8757894736842106, |
| "grad_norm": 0.08913037180900574, |
| "learning_rate": 2.7565543071161047e-05, |
| "loss": 2.6801, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8825263157894737, |
| "grad_norm": 0.10815408080816269, |
| "learning_rate": 2.606741573033708e-05, |
| "loss": 3.1505, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.8892631578947369, |
| "grad_norm": 0.14371147751808167, |
| "learning_rate": 2.4569288389513108e-05, |
| "loss": 3.1293, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.1680973470211029, |
| "learning_rate": 2.3071161048689138e-05, |
| "loss": 2.8961, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.9027368421052632, |
| "grad_norm": 0.19012019038200378, |
| "learning_rate": 2.157303370786517e-05, |
| "loss": 2.9096, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9094736842105263, |
| "grad_norm": 0.060957688838243484, |
| "learning_rate": 2.00749063670412e-05, |
| "loss": 2.6879, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9162105263157895, |
| "grad_norm": 0.15055014193058014, |
| "learning_rate": 1.857677902621723e-05, |
| "loss": 3.108, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.9229473684210526, |
| "grad_norm": 0.1378874033689499, |
| "learning_rate": 1.707865168539326e-05, |
| "loss": 3.0428, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9296842105263158, |
| "grad_norm": 0.14901022613048553, |
| "learning_rate": 1.558052434456929e-05, |
| "loss": 2.8589, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.9364210526315789, |
| "grad_norm": 0.17515867948532104, |
| "learning_rate": 1.4082397003745318e-05, |
| "loss": 2.8563, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.9431578947368421, |
| "grad_norm": 0.11909812688827515, |
| "learning_rate": 1.258426966292135e-05, |
| "loss": 2.5759, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9498947368421052, |
| "grad_norm": 0.16348549723625183, |
| "learning_rate": 1.1086142322097379e-05, |
| "loss": 3.089, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.9566315789473684, |
| "grad_norm": 0.08107765763998032, |
| "learning_rate": 9.588014981273409e-06, |
| "loss": 3.0145, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9633684210526315, |
| "grad_norm": 0.13251617550849915, |
| "learning_rate": 8.089887640449438e-06, |
| "loss": 2.8256, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.9701052631578947, |
| "grad_norm": 0.10319063812494278, |
| "learning_rate": 6.591760299625469e-06, |
| "loss": 2.8456, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.9768421052631578, |
| "grad_norm": 0.08950542658567429, |
| "learning_rate": 5.093632958801498e-06, |
| "loss": 2.605, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.983578947368421, |
| "grad_norm": 0.08379487693309784, |
| "learning_rate": 3.5955056179775286e-06, |
| "loss": 3.0334, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.9903157894736843, |
| "grad_norm": 0.1561821848154068, |
| "learning_rate": 2.097378277153558e-06, |
| "loss": 3.0357, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.9970526315789474, |
| "grad_norm": 0.07574011385440826, |
| "learning_rate": 5.992509363295881e-07, |
| "loss": 2.7458, |
| "step": 1480 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1484, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.32780044727799e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|