| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 441, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.022675736961451247, |
| "grad_norm": 3.335511174270437, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.9713, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.045351473922902494, |
| "grad_norm": 2.3040333943309217, |
| "learning_rate": 4.222222222222223e-06, |
| "loss": 0.8498, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06802721088435375, |
| "grad_norm": 2.3614246069249534, |
| "learning_rate": 6.444444444444445e-06, |
| "loss": 0.7652, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09070294784580499, |
| "grad_norm": 2.1960567488376763, |
| "learning_rate": 8.666666666666668e-06, |
| "loss": 0.7325, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11337868480725624, |
| "grad_norm": 2.085913789797008, |
| "learning_rate": 9.997482711915926e-06, |
| "loss": 0.6869, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1360544217687075, |
| "grad_norm": 2.1049834205536464, |
| "learning_rate": 9.969192322306271e-06, |
| "loss": 0.6767, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "grad_norm": 1.9848038930108394, |
| "learning_rate": 9.909643486313533e-06, |
| "loss": 0.6773, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18140589569160998, |
| "grad_norm": 2.0608792070223814, |
| "learning_rate": 9.819210792799711e-06, |
| "loss": 0.6866, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.20408163265306123, |
| "grad_norm": 1.9287379321776654, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.6695, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.22675736961451248, |
| "grad_norm": 1.7883594557608278, |
| "learning_rate": 9.548159976772593e-06, |
| "loss": 0.6705, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2494331065759637, |
| "grad_norm": 1.9101449860463477, |
| "learning_rate": 9.369246885348926e-06, |
| "loss": 0.6728, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.272108843537415, |
| "grad_norm": 1.8545319321700189, |
| "learning_rate": 9.162849273173857e-06, |
| "loss": 0.6611, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2947845804988662, |
| "grad_norm": 1.7226076367635828, |
| "learning_rate": 8.930265473713939e-06, |
| "loss": 0.6472, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.31746031746031744, |
| "grad_norm": 1.7366129387477443, |
| "learning_rate": 8.672958543287666e-06, |
| "loss": 0.6378, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3401360544217687, |
| "grad_norm": 1.8206937872347697, |
| "learning_rate": 8.392547057785662e-06, |
| "loss": 0.6548, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.36281179138321995, |
| "grad_norm": 1.7795719538545638, |
| "learning_rate": 8.090794931103026e-06, |
| "loss": 0.6417, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3854875283446712, |
| "grad_norm": 1.6985369927229417, |
| "learning_rate": 7.769600319330553e-06, |
| "loss": 0.6265, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.40816326530612246, |
| "grad_norm": 1.6792703345871458, |
| "learning_rate": 7.430983680502344e-06, |
| "loss": 0.6305, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4308390022675737, |
| "grad_norm": 1.6589676186929365, |
| "learning_rate": 7.0770750650094335e-06, |
| "loss": 0.6293, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.45351473922902497, |
| "grad_norm": 1.870540190373287, |
| "learning_rate": 6.710100716628345e-06, |
| "loss": 0.6384, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 1.7216866324584632, |
| "learning_rate": 6.332369068450175e-06, |
| "loss": 0.6282, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4988662131519274, |
| "grad_norm": 1.7007524294117728, |
| "learning_rate": 5.946256221802052e-06, |
| "loss": 0.6137, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5215419501133787, |
| "grad_norm": 1.7667641842852366, |
| "learning_rate": 5.5541909995050554e-06, |
| "loss": 0.6412, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.54421768707483, |
| "grad_norm": 1.7530294305813394, |
| "learning_rate": 5.15863966749034e-06, |
| "loss": 0.6002, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5668934240362812, |
| "grad_norm": 1.8152907726830068, |
| "learning_rate": 4.762090420881289e-06, |
| "loss": 0.6076, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5895691609977324, |
| "grad_norm": 1.4852622109755043, |
| "learning_rate": 4.367037732131254e-06, |
| "loss": 0.6218, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6122448979591837, |
| "grad_norm": 1.514865756410016, |
| "learning_rate": 3.975966659674048e-06, |
| "loss": 0.6185, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 1.76707927121814, |
| "learning_rate": 3.5913372157928515e-06, |
| "loss": 0.604, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6575963718820862, |
| "grad_norm": 1.6535309089760444, |
| "learning_rate": 3.2155688920406415e-06, |
| "loss": 0.5929, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6802721088435374, |
| "grad_norm": 1.5052017620305476, |
| "learning_rate": 2.851025439554142e-06, |
| "loss": 0.592, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7029478458049887, |
| "grad_norm": 1.6528278663277294, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.5869, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7256235827664399, |
| "grad_norm": 1.6162422608500033, |
| "learning_rate": 2.1647006806861472e-06, |
| "loss": 0.5894, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7482993197278912, |
| "grad_norm": 1.696207014695853, |
| "learning_rate": 1.8472366645773892e-06, |
| "loss": 0.5908, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7709750566893424, |
| "grad_norm": 1.6180693092160032, |
| "learning_rate": 1.549604942589441e-06, |
| "loss": 0.5955, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7936507936507936, |
| "grad_norm": 1.6781797741304556, |
| "learning_rate": 1.2736777516212267e-06, |
| "loss": 0.5974, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8163265306122449, |
| "grad_norm": 1.5228384254508163, |
| "learning_rate": 1.0211907973458391e-06, |
| "loss": 0.5813, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8390022675736961, |
| "grad_norm": 1.506538574395671, |
| "learning_rate": 7.937323358440935e-07, |
| "loss": 0.5993, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8616780045351474, |
| "grad_norm": 1.6388313127356224, |
| "learning_rate": 5.927331827620902e-07, |
| "loss": 0.5981, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8843537414965986, |
| "grad_norm": 1.5230885794830482, |
| "learning_rate": 4.194577128396521e-07, |
| "loss": 0.5951, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.9070294784580499, |
| "grad_norm": 1.6964084107708113, |
| "learning_rate": 2.7499590642665773e-07, |
| "loss": 0.5809, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9297052154195011, |
| "grad_norm": 1.772594630270358, |
| "learning_rate": 1.6025649301821877e-07, |
| "loss": 0.6161, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 1.6758084021033888, |
| "learning_rate": 7.59612349389599e-08, |
| "loss": 0.5964, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9750566893424036, |
| "grad_norm": 1.676267765187081, |
| "learning_rate": 2.264038713457706e-08, |
| "loss": 0.6167, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9977324263038548, |
| "grad_norm": 1.6550266971795458, |
| "learning_rate": 6.293616306246586e-10, |
| "loss": 0.5858, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 441, |
| "total_flos": 97286537363456.0, |
| "train_loss": 0.6426307206251183, |
| "train_runtime": 2167.4163, |
| "train_samples_per_second": 3.253, |
| "train_steps_per_second": 0.203 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 441, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 97286537363456.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|