| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0526315789473686, |
| "eval_steps": 500, |
| "global_step": 174, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.017543859649122806, |
| "grad_norm": 0.2277653039057954, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.0079, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03508771929824561, |
| "grad_norm": 0.22929541131469036, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.0155, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 0.23320532182877252, |
| "learning_rate": 5e-06, |
| "loss": 1.042, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.07017543859649122, |
| "grad_norm": 0.2327235097386226, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.0147, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.08771929824561403, |
| "grad_norm": 0.22899683750189437, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 1.0155, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 0.2346984759899663, |
| "learning_rate": 1e-05, |
| "loss": 1.0471, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.12280701754385964, |
| "grad_norm": 0.22126116958454167, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 1.0295, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.14035087719298245, |
| "grad_norm": 0.21457628974392648, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 1.0319, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.15789473684210525, |
| "grad_norm": 0.21253345072366, |
| "learning_rate": 1.5e-05, |
| "loss": 1.0166, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.17543859649122806, |
| "grad_norm": 0.21642169088434604, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 1.0331, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.19298245614035087, |
| "grad_norm": 0.18849235769492945, |
| "learning_rate": 1.8333333333333333e-05, |
| "loss": 1.0142, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 0.17642654464303906, |
| "learning_rate": 2e-05, |
| "loss": 0.9902, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.22807017543859648, |
| "grad_norm": 0.17187933882719988, |
| "learning_rate": 2.1666666666666667e-05, |
| "loss": 1.017, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.24561403508771928, |
| "grad_norm": 0.17103598555992858, |
| "learning_rate": 2.3333333333333336e-05, |
| "loss": 0.9751, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 0.16014487415950107, |
| "learning_rate": 2.5e-05, |
| "loss": 0.9881, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.2807017543859649, |
| "grad_norm": 0.14028695923022452, |
| "learning_rate": 2.4998640395219987e-05, |
| "loss": 0.9778, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.2982456140350877, |
| "grad_norm": 0.12551729140438972, |
| "learning_rate": 2.499456187664396e-05, |
| "loss": 0.9689, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 0.1251340971956454, |
| "learning_rate": 2.4987765331499672e-05, |
| "loss": 0.9429, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.1296210035785423, |
| "learning_rate": 2.497825223828555e-05, |
| "loss": 0.946, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.3508771929824561, |
| "grad_norm": 0.11329484685623345, |
| "learning_rate": 2.4966024666449125e-05, |
| "loss": 0.9366, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3684210526315789, |
| "grad_norm": 0.10321338855040195, |
| "learning_rate": 2.495108527593681e-05, |
| "loss": 0.9259, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.38596491228070173, |
| "grad_norm": 0.09404432805330766, |
| "learning_rate": 2.493343731661529e-05, |
| "loss": 0.9482, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.40350877192982454, |
| "grad_norm": 0.09243083734470846, |
| "learning_rate": 2.4913084627564535e-05, |
| "loss": 0.9065, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.08906226913721381, |
| "learning_rate": 2.4890031636242685e-05, |
| "loss": 0.8938, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.43859649122807015, |
| "grad_norm": 0.08389891645958811, |
| "learning_rate": 2.486428335752288e-05, |
| "loss": 0.916, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.45614035087719296, |
| "grad_norm": 0.08425667259579685, |
| "learning_rate": 2.483584539260238e-05, |
| "loss": 0.8779, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.47368421052631576, |
| "grad_norm": 0.07744492406671652, |
| "learning_rate": 2.480472392778407e-05, |
| "loss": 0.8834, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.49122807017543857, |
| "grad_norm": 0.08767059933162678, |
| "learning_rate": 2.4770925733130725e-05, |
| "loss": 0.9148, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.5087719298245614, |
| "grad_norm": 0.07726724919511256, |
| "learning_rate": 2.473445816099226e-05, |
| "loss": 0.9088, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.07836879806212735, |
| "learning_rate": 2.4695329144406337e-05, |
| "loss": 0.8944, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.543859649122807, |
| "grad_norm": 0.07205578520489196, |
| "learning_rate": 2.465354719537264e-05, |
| "loss": 0.8966, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.5614035087719298, |
| "grad_norm": 0.0732976565919632, |
| "learning_rate": 2.460912140300119e-05, |
| "loss": 0.8933, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.5789473684210527, |
| "grad_norm": 0.06413069817944542, |
| "learning_rate": 2.4562061431535128e-05, |
| "loss": 0.8687, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.5964912280701754, |
| "grad_norm": 0.061655130915948715, |
| "learning_rate": 2.4512377518248398e-05, |
| "loss": 0.8757, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.6140350877192983, |
| "grad_norm": 0.06005143040792398, |
| "learning_rate": 2.4460080471218766e-05, |
| "loss": 0.8763, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 0.059268901460994255, |
| "learning_rate": 2.4405181666976646e-05, |
| "loss": 0.8691, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.6491228070175439, |
| "grad_norm": 0.0632968952247683, |
| "learning_rate": 2.43476930480303e-05, |
| "loss": 0.876, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.05479140319445762, |
| "learning_rate": 2.428762712026792e-05, |
| "loss": 0.8682, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.6842105263157895, |
| "grad_norm": 0.05563512390428038, |
| "learning_rate": 2.4224996950237093e-05, |
| "loss": 0.8841, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 0.05247972036076721, |
| "learning_rate": 2.4159816162302394e-05, |
| "loss": 0.8787, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.7192982456140351, |
| "grad_norm": 0.061183366717530226, |
| "learning_rate": 2.4092098935681556e-05, |
| "loss": 0.8549, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 0.05375757865160034, |
| "learning_rate": 2.402186000136098e-05, |
| "loss": 0.8528, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.7543859649122807, |
| "grad_norm": 0.05057045758521559, |
| "learning_rate": 2.39491146388912e-05, |
| "loss": 0.8536, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.7719298245614035, |
| "grad_norm": 0.04649702726117417, |
| "learning_rate": 2.387387867306302e-05, |
| "loss": 0.8488, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 0.04787518315340449, |
| "learning_rate": 2.379616847046505e-05, |
| "loss": 0.8573, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.8070175438596491, |
| "grad_norm": 0.045213502324060025, |
| "learning_rate": 2.371600093592335e-05, |
| "loss": 0.8727, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.8245614035087719, |
| "grad_norm": 0.04738126021150567, |
| "learning_rate": 2.3633393508824022e-05, |
| "loss": 0.8633, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.043833518738075415, |
| "learning_rate": 2.3548364159319513e-05, |
| "loss": 0.868, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.8596491228070176, |
| "grad_norm": 0.042510793627425734, |
| "learning_rate": 2.3460931384419427e-05, |
| "loss": 0.852, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.8771929824561403, |
| "grad_norm": 0.0408258194280563, |
| "learning_rate": 2.3371114203966756e-05, |
| "loss": 0.8595, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.8947368421052632, |
| "grad_norm": 0.041284443987598174, |
| "learning_rate": 2.3278932156500348e-05, |
| "loss": 0.8701, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.9122807017543859, |
| "grad_norm": 0.04015399468491511, |
| "learning_rate": 2.3184405295004592e-05, |
| "loss": 0.8378, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.9298245614035088, |
| "grad_norm": 0.0470194493755998, |
| "learning_rate": 2.3087554182547123e-05, |
| "loss": 0.8522, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 0.040346271463175765, |
| "learning_rate": 2.298839988780561e-05, |
| "loss": 0.8571, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.9649122807017544, |
| "grad_norm": 0.03907150168014673, |
| "learning_rate": 2.288696398048455e-05, |
| "loss": 0.8389, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.9824561403508771, |
| "grad_norm": 0.03966173521753987, |
| "learning_rate": 2.278326852662305e-05, |
| "loss": 0.8473, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.03792422830368885, |
| "learning_rate": 2.267733608379468e-05, |
| "loss": 0.8308, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.0175438596491229, |
| "grad_norm": 0.039916995591762185, |
| "learning_rate": 2.2569189696200327e-05, |
| "loss": 0.8363, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.0350877192982457, |
| "grad_norm": 0.04222978922823946, |
| "learning_rate": 2.2458852889655284e-05, |
| "loss": 0.8248, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 0.037766008270626816, |
| "learning_rate": 2.234634966647148e-05, |
| "loss": 0.8108, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.0701754385964912, |
| "grad_norm": 0.03910137890933386, |
| "learning_rate": 2.2231704500236117e-05, |
| "loss": 0.8111, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.087719298245614, |
| "grad_norm": 0.03723938187511672, |
| "learning_rate": 2.211494233048776e-05, |
| "loss": 0.7961, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.1052631578947367, |
| "grad_norm": 0.03536618534586841, |
| "learning_rate": 2.1996088557291062e-05, |
| "loss": 0.8083, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.1228070175438596, |
| "grad_norm": 0.03509557198121232, |
| "learning_rate": 2.1875169035711335e-05, |
| "loss": 0.8301, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.1403508771929824, |
| "grad_norm": 0.03631360422996243, |
| "learning_rate": 2.1752210070190106e-05, |
| "loss": 0.8119, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.1578947368421053, |
| "grad_norm": 0.034923838848099804, |
| "learning_rate": 2.162723840882293e-05, |
| "loss": 0.8351, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.1754385964912282, |
| "grad_norm": 0.034222477158642954, |
| "learning_rate": 2.150028123754072e-05, |
| "loss": 0.8396, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.1929824561403508, |
| "grad_norm": 0.044099006560021574, |
| "learning_rate": 2.137136617419578e-05, |
| "loss": 0.8132, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 0.03760194901086737, |
| "learning_rate": 2.1240521262553927e-05, |
| "loss": 0.8277, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.2280701754385965, |
| "grad_norm": 0.034132230637497686, |
| "learning_rate": 2.1107774966193932e-05, |
| "loss": 0.8231, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.2456140350877192, |
| "grad_norm": 0.034145371494878535, |
| "learning_rate": 2.097315616231564e-05, |
| "loss": 0.8116, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.263157894736842, |
| "grad_norm": 0.03402745474331636, |
| "learning_rate": 2.0836694135458136e-05, |
| "loss": 0.8283, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.280701754385965, |
| "grad_norm": 0.046074062113807, |
| "learning_rate": 2.0698418571129255e-05, |
| "loss": 0.8161, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.2982456140350878, |
| "grad_norm": 0.03409175484451008, |
| "learning_rate": 2.055835954934791e-05, |
| "loss": 0.8056, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 0.03601398897730395, |
| "learning_rate": 2.041654753810059e-05, |
| "loss": 0.8139, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.035605249354124874, |
| "learning_rate": 2.027301338671342e-05, |
| "loss": 0.7993, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.3508771929824561, |
| "grad_norm": 0.03851629705577501, |
| "learning_rate": 2.0127788319141345e-05, |
| "loss": 0.8192, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.368421052631579, |
| "grad_norm": 0.03560123297297108, |
| "learning_rate": 1.998090392717572e-05, |
| "loss": 0.8194, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.3859649122807016, |
| "grad_norm": 0.03338440818080332, |
| "learning_rate": 1.9832392163571977e-05, |
| "loss": 0.823, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.4035087719298245, |
| "grad_norm": 0.039320599103418945, |
| "learning_rate": 1.968228533509871e-05, |
| "loss": 0.7991, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.4210526315789473, |
| "grad_norm": 0.03413703166680613, |
| "learning_rate": 1.953061609550976e-05, |
| "loss": 0.8122, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.4385964912280702, |
| "grad_norm": 0.03632660780988978, |
| "learning_rate": 1.937741743844082e-05, |
| "loss": 0.8051, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.456140350877193, |
| "grad_norm": 0.2720784291107051, |
| "learning_rate": 1.9222722690232124e-05, |
| "loss": 0.7982, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.4736842105263157, |
| "grad_norm": 0.033948193629099205, |
| "learning_rate": 1.9066565502678735e-05, |
| "loss": 0.8244, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.4912280701754386, |
| "grad_norm": 0.04015231276799685, |
| "learning_rate": 1.8908979845710028e-05, |
| "loss": 0.802, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.5087719298245614, |
| "grad_norm": 0.0334280910595663, |
| "learning_rate": 1.8750000000000002e-05, |
| "loss": 0.7944, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.526315789473684, |
| "grad_norm": 0.033433004016842426, |
| "learning_rate": 1.8589660549509958e-05, |
| "loss": 0.8086, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.543859649122807, |
| "grad_norm": 0.03647767785675323, |
| "learning_rate": 1.842799637396523e-05, |
| "loss": 0.8005, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.5614035087719298, |
| "grad_norm": 0.034851294076943595, |
| "learning_rate": 1.8265042641267543e-05, |
| "loss": 0.7697, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 0.035934237530251795, |
| "learning_rate": 1.8100834799844733e-05, |
| "loss": 0.8017, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.5964912280701755, |
| "grad_norm": 0.035583328708530516, |
| "learning_rate": 1.793540857093937e-05, |
| "loss": 0.8035, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.6140350877192984, |
| "grad_norm": 0.035602801094138097, |
| "learning_rate": 1.77687999408381e-05, |
| "loss": 0.7785, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "grad_norm": 0.03338943294215932, |
| "learning_rate": 1.760104515304331e-05, |
| "loss": 0.809, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.6491228070175439, |
| "grad_norm": 0.03440455754366396, |
| "learning_rate": 1.743218070038882e-05, |
| "loss": 0.7835, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.03413755186702014, |
| "learning_rate": 1.7262243317101342e-05, |
| "loss": 0.7857, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.6842105263157894, |
| "grad_norm": 0.03450977935766268, |
| "learning_rate": 1.709126997080946e-05, |
| "loss": 0.8045, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.7017543859649122, |
| "grad_norm": 0.03521585021316178, |
| "learning_rate": 1.6919297854501793e-05, |
| "loss": 0.7935, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.719298245614035, |
| "grad_norm": 0.03493887488862163, |
| "learning_rate": 1.674636437843616e-05, |
| "loss": 0.798, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.736842105263158, |
| "grad_norm": 0.035529760900503735, |
| "learning_rate": 1.6572507162001472e-05, |
| "loss": 0.799, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.7543859649122808, |
| "grad_norm": 0.033513087382252796, |
| "learning_rate": 1.6397764025534122e-05, |
| "loss": 0.7894, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.7719298245614035, |
| "grad_norm": 0.19122994636460844, |
| "learning_rate": 1.6222172982090696e-05, |
| "loss": 0.7804, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.7894736842105263, |
| "grad_norm": 0.035807687623747184, |
| "learning_rate": 1.604577222917871e-05, |
| "loss": 0.7951, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.807017543859649, |
| "grad_norm": 0.03259107722786136, |
| "learning_rate": 1.586860014044726e-05, |
| "loss": 0.7781, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.8245614035087718, |
| "grad_norm": 0.036809691518080494, |
| "learning_rate": 1.5690695257339348e-05, |
| "loss": 0.8008, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 0.035038305488987835, |
| "learning_rate": 1.551209628070768e-05, |
| "loss": 0.7753, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.8596491228070176, |
| "grad_norm": 0.03563704680282362, |
| "learning_rate": 1.5332842062395837e-05, |
| "loss": 0.8109, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.8771929824561404, |
| "grad_norm": 0.03337909268033773, |
| "learning_rate": 1.5152971596786539e-05, |
| "loss": 0.8074, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.8947368421052633, |
| "grad_norm": 0.03364182376321439, |
| "learning_rate": 1.4972524012318968e-05, |
| "loss": 0.7814, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.912280701754386, |
| "grad_norm": 0.03394899793236395, |
| "learning_rate": 1.4791538562976858e-05, |
| "loss": 0.8046, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.9298245614035088, |
| "grad_norm": 0.03276078625186682, |
| "learning_rate": 1.4610054619749335e-05, |
| "loss": 0.7923, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.9473684210526314, |
| "grad_norm": 0.035403995130927304, |
| "learning_rate": 1.442811166206628e-05, |
| "loss": 0.8036, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.9649122807017543, |
| "grad_norm": 0.036133327261938186, |
| "learning_rate": 1.4245749269210077e-05, |
| "loss": 0.7875, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.9824561403508771, |
| "grad_norm": 0.036600172448979534, |
| "learning_rate": 1.40630071117057e-05, |
| "loss": 0.7697, |
| "step": 113 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.037601350569099724, |
| "learning_rate": 1.3879924942690875e-05, |
| "loss": 0.8189, |
| "step": 114 |
| }, |
| { |
| "epoch": 2.017543859649123, |
| "grad_norm": 0.03302530011499694, |
| "learning_rate": 1.3696542589268343e-05, |
| "loss": 0.7611, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.0350877192982457, |
| "grad_norm": 0.03512883427203999, |
| "learning_rate": 1.3512899943842001e-05, |
| "loss": 0.8027, |
| "step": 116 |
| }, |
| { |
| "epoch": 2.0526315789473686, |
| "grad_norm": 0.032765823734136675, |
| "learning_rate": 1.3329036955438801e-05, |
| "loss": 0.7739, |
| "step": 117 |
| }, |
| { |
| "epoch": 2.0701754385964914, |
| "grad_norm": 0.0325717443826363, |
| "learning_rate": 1.3144993621018414e-05, |
| "loss": 0.7749, |
| "step": 118 |
| }, |
| { |
| "epoch": 2.087719298245614, |
| "grad_norm": 0.03351967483422044, |
| "learning_rate": 1.2960809976772395e-05, |
| "loss": 0.7776, |
| "step": 119 |
| }, |
| { |
| "epoch": 2.1052631578947367, |
| "grad_norm": 0.03412426117127494, |
| "learning_rate": 1.2776526089414836e-05, |
| "loss": 0.7604, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.1228070175438596, |
| "grad_norm": 0.034817647751256633, |
| "learning_rate": 1.2592182047466405e-05, |
| "loss": 0.7701, |
| "step": 121 |
| }, |
| { |
| "epoch": 2.1403508771929824, |
| "grad_norm": 0.03260110561553477, |
| "learning_rate": 1.2407817952533594e-05, |
| "loss": 0.7741, |
| "step": 122 |
| }, |
| { |
| "epoch": 2.1578947368421053, |
| "grad_norm": 0.03193494436049472, |
| "learning_rate": 1.2223473910585165e-05, |
| "loss": 0.7645, |
| "step": 123 |
| }, |
| { |
| "epoch": 2.175438596491228, |
| "grad_norm": 0.03312398775341158, |
| "learning_rate": 1.2039190023227611e-05, |
| "loss": 0.7585, |
| "step": 124 |
| }, |
| { |
| "epoch": 2.192982456140351, |
| "grad_norm": 0.03343964045395972, |
| "learning_rate": 1.1855006378981588e-05, |
| "loss": 0.7921, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.2105263157894735, |
| "grad_norm": 0.033686542752496544, |
| "learning_rate": 1.1670963044561205e-05, |
| "loss": 0.7827, |
| "step": 126 |
| }, |
| { |
| "epoch": 2.2280701754385963, |
| "grad_norm": 0.03497709388430689, |
| "learning_rate": 1.1487100056158e-05, |
| "loss": 0.7867, |
| "step": 127 |
| }, |
| { |
| "epoch": 2.245614035087719, |
| "grad_norm": 0.03909574941588132, |
| "learning_rate": 1.1303457410731658e-05, |
| "loss": 0.7651, |
| "step": 128 |
| }, |
| { |
| "epoch": 2.263157894736842, |
| "grad_norm": 0.03295131774552763, |
| "learning_rate": 1.112007505730913e-05, |
| "loss": 0.7716, |
| "step": 129 |
| }, |
| { |
| "epoch": 2.280701754385965, |
| "grad_norm": 0.03535332720570186, |
| "learning_rate": 1.0936992888294304e-05, |
| "loss": 0.7519, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.2982456140350878, |
| "grad_norm": 0.034260157940805745, |
| "learning_rate": 1.0754250730789925e-05, |
| "loss": 0.7778, |
| "step": 131 |
| }, |
| { |
| "epoch": 2.3157894736842106, |
| "grad_norm": 0.03778502943480454, |
| "learning_rate": 1.057188833793372e-05, |
| "loss": 0.7785, |
| "step": 132 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.03282521968583762, |
| "learning_rate": 1.0389945380250666e-05, |
| "loss": 0.7822, |
| "step": 133 |
| }, |
| { |
| "epoch": 2.3508771929824563, |
| "grad_norm": 0.03439059832810125, |
| "learning_rate": 1.0208461437023146e-05, |
| "loss": 0.7774, |
| "step": 134 |
| }, |
| { |
| "epoch": 2.3684210526315788, |
| "grad_norm": 0.03579842875417821, |
| "learning_rate": 1.0027475987681033e-05, |
| "loss": 0.7626, |
| "step": 135 |
| }, |
| { |
| "epoch": 2.3859649122807016, |
| "grad_norm": 0.04487526535583229, |
| "learning_rate": 9.847028403213464e-06, |
| "loss": 0.785, |
| "step": 136 |
| }, |
| { |
| "epoch": 2.4035087719298245, |
| "grad_norm": 0.03582408613012423, |
| "learning_rate": 9.667157937604165e-06, |
| "loss": 0.772, |
| "step": 137 |
| }, |
| { |
| "epoch": 2.4210526315789473, |
| "grad_norm": 0.035176969293327615, |
| "learning_rate": 9.487903719292321e-06, |
| "loss": 0.7777, |
| "step": 138 |
| }, |
| { |
| "epoch": 2.43859649122807, |
| "grad_norm": 0.03346578608050542, |
| "learning_rate": 9.309304742660656e-06, |
| "loss": 0.7577, |
| "step": 139 |
| }, |
| { |
| "epoch": 2.456140350877193, |
| "grad_norm": 0.039503770377912154, |
| "learning_rate": 9.131399859552739e-06, |
| "loss": 0.7901, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.473684210526316, |
| "grad_norm": 0.03677116076069421, |
| "learning_rate": 8.954227770821292e-06, |
| "loss": 0.7723, |
| "step": 141 |
| }, |
| { |
| "epoch": 2.4912280701754383, |
| "grad_norm": 0.03338240458340637, |
| "learning_rate": 8.77782701790931e-06, |
| "loss": 0.7617, |
| "step": 142 |
| }, |
| { |
| "epoch": 2.5087719298245617, |
| "grad_norm": 0.033837659971756015, |
| "learning_rate": 8.60223597446588e-06, |
| "loss": 0.7713, |
| "step": 143 |
| }, |
| { |
| "epoch": 2.526315789473684, |
| "grad_norm": 0.03820384430127108, |
| "learning_rate": 8.427492837998533e-06, |
| "loss": 0.7557, |
| "step": 144 |
| }, |
| { |
| "epoch": 2.543859649122807, |
| "grad_norm": 0.03252410748125962, |
| "learning_rate": 8.25363562156384e-06, |
| "loss": 0.7805, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.56140350877193, |
| "grad_norm": 0.03706815897768721, |
| "learning_rate": 8.080702145498206e-06, |
| "loss": 0.7645, |
| "step": 146 |
| }, |
| { |
| "epoch": 2.5789473684210527, |
| "grad_norm": 0.035598168147733984, |
| "learning_rate": 7.908730029190544e-06, |
| "loss": 0.7877, |
| "step": 147 |
| }, |
| { |
| "epoch": 2.5964912280701755, |
| "grad_norm": 0.0333688858025551, |
| "learning_rate": 7.737756682898659e-06, |
| "loss": 0.7591, |
| "step": 148 |
| }, |
| { |
| "epoch": 2.6140350877192984, |
| "grad_norm": 0.044640638601682346, |
| "learning_rate": 7.567819299611184e-06, |
| "loss": 0.7658, |
| "step": 149 |
| }, |
| { |
| "epoch": 2.6315789473684212, |
| "grad_norm": 0.033158357578421456, |
| "learning_rate": 7.398954846956688e-06, |
| "loss": 0.7719, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.6491228070175437, |
| "grad_norm": 0.033194259963536865, |
| "learning_rate": 7.231200059161899e-06, |
| "loss": 0.7806, |
| "step": 151 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.03518173471203294, |
| "learning_rate": 7.064591429060635e-06, |
| "loss": 0.7679, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.6842105263157894, |
| "grad_norm": 0.03276207653785537, |
| "learning_rate": 6.8991652001552695e-06, |
| "loss": 0.7728, |
| "step": 153 |
| }, |
| { |
| "epoch": 2.7017543859649122, |
| "grad_norm": 0.0347231674496661, |
| "learning_rate": 6.734957358732458e-06, |
| "loss": 0.7741, |
| "step": 154 |
| }, |
| { |
| "epoch": 2.719298245614035, |
| "grad_norm": 0.033258910497780264, |
| "learning_rate": 6.572003626034776e-06, |
| "loss": 0.7728, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.736842105263158, |
| "grad_norm": 0.033923018163736, |
| "learning_rate": 6.410339450490047e-06, |
| "loss": 0.7838, |
| "step": 156 |
| }, |
| { |
| "epoch": 2.754385964912281, |
| "grad_norm": 0.03499101652185436, |
| "learning_rate": 6.250000000000003e-06, |
| "loss": 0.7836, |
| "step": 157 |
| }, |
| { |
| "epoch": 2.7719298245614032, |
| "grad_norm": 0.037455923489755065, |
| "learning_rate": 6.091020154289971e-06, |
| "loss": 0.786, |
| "step": 158 |
| }, |
| { |
| "epoch": 2.7894736842105265, |
| "grad_norm": 0.03406798967875397, |
| "learning_rate": 5.933434497321268e-06, |
| "loss": 0.7607, |
| "step": 159 |
| }, |
| { |
| "epoch": 2.807017543859649, |
| "grad_norm": 0.03263055408168355, |
| "learning_rate": 5.777277309767873e-06, |
| "loss": 0.7835, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.824561403508772, |
| "grad_norm": 0.0370087111535257, |
| "learning_rate": 5.62258256155918e-06, |
| "loss": 0.7506, |
| "step": 161 |
| }, |
| { |
| "epoch": 2.8421052631578947, |
| "grad_norm": 0.03253506069052104, |
| "learning_rate": 5.469383904490243e-06, |
| "loss": 0.7849, |
| "step": 162 |
| }, |
| { |
| "epoch": 2.8596491228070176, |
| "grad_norm": 0.034164574715283676, |
| "learning_rate": 5.317714664901289e-06, |
| "loss": 0.7665, |
| "step": 163 |
| }, |
| { |
| "epoch": 2.8771929824561404, |
| "grad_norm": 0.03375521327460909, |
| "learning_rate": 5.167607836428023e-06, |
| "loss": 0.7497, |
| "step": 164 |
| }, |
| { |
| "epoch": 2.8947368421052633, |
| "grad_norm": 0.035848240948616814, |
| "learning_rate": 5.0190960728242834e-06, |
| "loss": 0.7904, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.912280701754386, |
| "grad_norm": 0.03266391216652421, |
| "learning_rate": 4.872211680858662e-06, |
| "loss": 0.7592, |
| "step": 166 |
| }, |
| { |
| "epoch": 2.9298245614035086, |
| "grad_norm": 0.03349205497732682, |
| "learning_rate": 4.726986613286583e-06, |
| "loss": 0.7666, |
| "step": 167 |
| }, |
| { |
| "epoch": 2.9473684210526314, |
| "grad_norm": 0.034893605290688134, |
| "learning_rate": 4.5834524618994106e-06, |
| "loss": 0.7676, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.9649122807017543, |
| "grad_norm": 0.034664258032328234, |
| "learning_rate": 4.441640450652093e-06, |
| "loss": 0.7675, |
| "step": 169 |
| }, |
| { |
| "epoch": 2.982456140350877, |
| "grad_norm": 0.032567147685664447, |
| "learning_rate": 4.30158142887075e-06, |
| "loss": 0.7607, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.034965409248096775, |
| "learning_rate": 4.163305864541865e-06, |
| "loss": 0.7622, |
| "step": 171 |
| }, |
| { |
| "epoch": 3.017543859649123, |
| "grad_norm": 0.034735445589064905, |
| "learning_rate": 4.026843837684359e-06, |
| "loss": 0.7767, |
| "step": 172 |
| }, |
| { |
| "epoch": 3.0350877192982457, |
| "grad_norm": 0.061447912336751793, |
| "learning_rate": 3.89222503380607e-06, |
| "loss": 0.7423, |
| "step": 173 |
| }, |
| { |
| "epoch": 3.0526315789473686, |
| "grad_norm": 0.03296491017047373, |
| "learning_rate": 3.7594787374460747e-06, |
| "loss": 0.7608, |
| "step": 174 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 228, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 29, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.1115148672434176e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|