| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 292, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.017123287671232876, |
| "grad_norm": 1.0587148666381836, |
| "learning_rate": 1.6438356164383561e-06, |
| "loss": 1.2908, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03424657534246575, |
| "grad_norm": 0.957391619682312, |
| "learning_rate": 3.6986301369863014e-06, |
| "loss": 1.408, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05136986301369863, |
| "grad_norm": 0.8232783675193787, |
| "learning_rate": 5.753424657534246e-06, |
| "loss": 1.2972, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0684931506849315, |
| "grad_norm": 0.707105278968811, |
| "learning_rate": 7.808219178082192e-06, |
| "loss": 1.2907, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08561643835616438, |
| "grad_norm": 0.5033673048019409, |
| "learning_rate": 9.863013698630136e-06, |
| "loss": 1.28, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.10273972602739725, |
| "grad_norm": 0.51893150806427, |
| "learning_rate": 1.1917808219178083e-05, |
| "loss": 1.2497, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11986301369863013, |
| "grad_norm": 0.5541372299194336, |
| "learning_rate": 1.3972602739726027e-05, |
| "loss": 1.2018, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.136986301369863, |
| "grad_norm": 0.4447920620441437, |
| "learning_rate": 1.6027397260273974e-05, |
| "loss": 1.218, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1541095890410959, |
| "grad_norm": 0.5761701464653015, |
| "learning_rate": 1.8082191780821916e-05, |
| "loss": 1.2215, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.17123287671232876, |
| "grad_norm": 0.46446430683135986, |
| "learning_rate": 2.0136986301369863e-05, |
| "loss": 1.1981, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18835616438356165, |
| "grad_norm": 0.4923893213272095, |
| "learning_rate": 2.219178082191781e-05, |
| "loss": 1.2212, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2054794520547945, |
| "grad_norm": 0.4145517945289612, |
| "learning_rate": 2.4246575342465755e-05, |
| "loss": 1.1524, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2226027397260274, |
| "grad_norm": 0.5622988939285278, |
| "learning_rate": 2.6301369863013698e-05, |
| "loss": 1.15, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.23972602739726026, |
| "grad_norm": 0.45440879464149475, |
| "learning_rate": 2.8356164383561644e-05, |
| "loss": 1.1336, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2568493150684932, |
| "grad_norm": 0.5431708693504333, |
| "learning_rate": 2.999996152240661e-05, |
| "loss": 1.1332, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.273972602739726, |
| "grad_norm": 0.5097510814666748, |
| "learning_rate": 2.9998614827365136e-05, |
| "loss": 1.0534, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2910958904109589, |
| "grad_norm": 0.523253858089447, |
| "learning_rate": 2.999534445005289e-05, |
| "loss": 1.0262, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3082191780821918, |
| "grad_norm": 0.5461484789848328, |
| "learning_rate": 2.9990150809919714e-05, |
| "loss": 1.0322, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3253424657534247, |
| "grad_norm": 0.5159561634063721, |
| "learning_rate": 2.998303457308803e-05, |
| "loss": 1.0268, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3424657534246575, |
| "grad_norm": 0.5679484009742737, |
| "learning_rate": 2.997399665226736e-05, |
| "loss": 1.0459, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3595890410958904, |
| "grad_norm": 0.659304678440094, |
| "learning_rate": 2.9963038206637277e-05, |
| "loss": 1.0858, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3767123287671233, |
| "grad_norm": 0.5805976390838623, |
| "learning_rate": 2.9950160641698755e-05, |
| "loss": 1.0286, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3938356164383562, |
| "grad_norm": 0.6749956011772156, |
| "learning_rate": 2.993536560909387e-05, |
| "loss": 0.9836, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.410958904109589, |
| "grad_norm": 0.6401397585868835, |
| "learning_rate": 2.991865500639398e-05, |
| "loss": 0.996, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4280821917808219, |
| "grad_norm": 0.6153554916381836, |
| "learning_rate": 2.990003097685634e-05, |
| "loss": 0.9677, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4452054794520548, |
| "grad_norm": 0.6153773069381714, |
| "learning_rate": 2.987949590914923e-05, |
| "loss": 0.9196, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4623287671232877, |
| "grad_norm": 0.6787427067756653, |
| "learning_rate": 2.985705243704559e-05, |
| "loss": 0.9613, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4794520547945205, |
| "grad_norm": 0.6827302575111389, |
| "learning_rate": 2.9832703439085174e-05, |
| "loss": 0.9559, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4965753424657534, |
| "grad_norm": 0.7343211770057678, |
| "learning_rate": 2.9806452038205437e-05, |
| "loss": 0.9245, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5136986301369864, |
| "grad_norm": 0.7253044247627258, |
| "learning_rate": 2.977830160134091e-05, |
| "loss": 0.9052, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5308219178082192, |
| "grad_norm": 0.7713239789009094, |
| "learning_rate": 2.974825573899144e-05, |
| "loss": 0.9172, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.547945205479452, |
| "grad_norm": 0.7671046257019043, |
| "learning_rate": 2.9716318304759057e-05, |
| "loss": 0.9238, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.565068493150685, |
| "grad_norm": 0.8231728076934814, |
| "learning_rate": 2.9682493394853763e-05, |
| "loss": 0.9248, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5821917808219178, |
| "grad_norm": 0.8311623930931091, |
| "learning_rate": 2.9646785347568143e-05, |
| "loss": 0.8765, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5993150684931506, |
| "grad_norm": 0.7531796097755432, |
| "learning_rate": 2.9609198742720957e-05, |
| "loss": 0.8939, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6164383561643836, |
| "grad_norm": 1.0523626804351807, |
| "learning_rate": 2.9569738401069728e-05, |
| "loss": 0.8488, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6335616438356164, |
| "grad_norm": 0.9050348401069641, |
| "learning_rate": 2.9528409383692465e-05, |
| "loss": 0.8467, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6506849315068494, |
| "grad_norm": 0.8227254748344421, |
| "learning_rate": 2.948521699133853e-05, |
| "loss": 0.83, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6678082191780822, |
| "grad_norm": 0.8400980830192566, |
| "learning_rate": 2.9440166763748782e-05, |
| "loss": 0.7697, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.684931506849315, |
| "grad_norm": 0.854492723941803, |
| "learning_rate": 2.9393264478945073e-05, |
| "loss": 0.8413, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.702054794520548, |
| "grad_norm": 0.9226430654525757, |
| "learning_rate": 2.934451615248915e-05, |
| "loss": 0.8085, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7191780821917808, |
| "grad_norm": 0.8370131850242615, |
| "learning_rate": 2.929392803671114e-05, |
| "loss": 0.8028, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7363013698630136, |
| "grad_norm": 0.927441418170929, |
| "learning_rate": 2.9241506619907636e-05, |
| "loss": 0.8509, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7534246575342466, |
| "grad_norm": 0.9028705358505249, |
| "learning_rate": 2.9187258625509518e-05, |
| "loss": 0.7967, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7705479452054794, |
| "grad_norm": 0.8795896768569946, |
| "learning_rate": 2.9131191011219634e-05, |
| "loss": 0.7865, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7876712328767124, |
| "grad_norm": 0.8585197329521179, |
| "learning_rate": 2.907331096812041e-05, |
| "loss": 0.764, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8047945205479452, |
| "grad_norm": 0.8900719881057739, |
| "learning_rate": 2.9013625919751557e-05, |
| "loss": 0.8205, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.821917808219178, |
| "grad_norm": 0.9867483377456665, |
| "learning_rate": 2.8952143521157933e-05, |
| "loss": 0.7868, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.839041095890411, |
| "grad_norm": 1.04231595993042, |
| "learning_rate": 2.888887165790775e-05, |
| "loss": 0.7418, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8561643835616438, |
| "grad_norm": 0.9595353007316589, |
| "learning_rate": 2.8823818445081152e-05, |
| "loss": 0.7532, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8732876712328768, |
| "grad_norm": 1.2640131711959839, |
| "learning_rate": 2.8756992226229443e-05, |
| "loss": 0.6791, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8904109589041096, |
| "grad_norm": 1.0312385559082031, |
| "learning_rate": 2.8688401572304927e-05, |
| "loss": 0.7609, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9075342465753424, |
| "grad_norm": 0.9013313055038452, |
| "learning_rate": 2.8618055280561656e-05, |
| "loss": 0.7723, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.9246575342465754, |
| "grad_norm": 1.1069859266281128, |
| "learning_rate": 2.854596237342708e-05, |
| "loss": 0.6889, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9417808219178082, |
| "grad_norm": 1.0332902669906616, |
| "learning_rate": 2.8472132097344877e-05, |
| "loss": 0.7521, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.958904109589041, |
| "grad_norm": 1.0214784145355225, |
| "learning_rate": 2.839657392158904e-05, |
| "loss": 0.6667, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.976027397260274, |
| "grad_norm": 1.0733942985534668, |
| "learning_rate": 2.8319297537049338e-05, |
| "loss": 0.6784, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9931506849315068, |
| "grad_norm": 0.9576444029808044, |
| "learning_rate": 2.8240312854988424e-05, |
| "loss": 0.7012, |
| "step": 290 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1460, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.2721250868881e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|