{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.31506849315068, "eval_steps": 400, "global_step": 14400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7123287671232876, "grad_norm": 2.024919271469116, "learning_rate": 5e-05, "loss": 3.8492, "step": 500 }, { "epoch": 3.4246575342465753, "grad_norm": 1.7762272357940674, "learning_rate": 4.6973365617433416e-05, "loss": 3.104, "step": 1000 }, { "epoch": 5.136986301369863, "grad_norm": 1.9453787803649902, "learning_rate": 4.394673123486683e-05, "loss": 2.7752, "step": 1500 }, { "epoch": 6.8493150684931505, "grad_norm": 1.9978324174880981, "learning_rate": 4.092009685230024e-05, "loss": 2.5478, "step": 2000 }, { "epoch": 8.561643835616438, "grad_norm": 1.8244125843048096, "learning_rate": 3.789346246973366e-05, "loss": 2.3579, "step": 2500 }, { "epoch": 10.273972602739725, "grad_norm": 2.0865862369537354, "learning_rate": 3.486682808716707e-05, "loss": 2.2151, "step": 3000 }, { "epoch": 11.986301369863014, "grad_norm": 2.022252321243286, "learning_rate": 3.184019370460048e-05, "loss": 2.0783, "step": 3500 }, { "epoch": 13.698630136986301, "grad_norm": 2.030592918395996, "learning_rate": 2.88135593220339e-05, "loss": 1.9546, "step": 4000 }, { "epoch": 15.41095890410959, "grad_norm": 2.101133346557617, "learning_rate": 2.5786924939467316e-05, "loss": 1.8636, "step": 4500 }, { "epoch": 17.123287671232877, "grad_norm": 2.0378260612487793, "learning_rate": 2.2760290556900726e-05, "loss": 1.7814, "step": 5000 }, { "epoch": 18.835616438356166, "grad_norm": 2.2219014167785645, "learning_rate": 1.9733656174334143e-05, "loss": 1.7052, "step": 5500 }, { "epoch": 20.54794520547945, "grad_norm": 2.211897373199463, "learning_rate": 1.6707021791767556e-05, "loss": 1.6337, "step": 6000 }, { "epoch": 22.26027397260274, "grad_norm": 2.0061228275299072, "learning_rate": 1.3680387409200971e-05, "loss": 1.5842, "step": 6500 }, { "epoch": 23.972602739726028, "grad_norm": 2.1824605464935303, "learning_rate": 1.0653753026634383e-05, "loss": 1.544, "step": 7000 }, { "epoch": 25.684931506849313, "grad_norm": 2.1209356784820557, "learning_rate": 7.627118644067798e-06, "loss": 1.5001, "step": 7500 }, { "epoch": 27.397260273972602, "grad_norm": 2.1307101249694824, "learning_rate": 4.600484261501211e-06, "loss": 1.4742, "step": 8000 }, { "epoch": 29.10958904109589, "grad_norm": 2.0766286849975586, "learning_rate": 1.5738498789346248e-06, "loss": 1.4553, "step": 8500 }, { "epoch": 30.82191780821918, "grad_norm": 2.4918978214263916, "learning_rate": 1.9858156028368796e-05, "loss": 1.6283, "step": 9000 }, { "epoch": 32.534246575342465, "grad_norm": 2.147444725036621, "learning_rate": 1.8085106382978724e-05, "loss": 1.5809, "step": 9500 }, { "epoch": 34.24657534246575, "grad_norm": 2.2365379333496094, "learning_rate": 1.6312056737588656e-05, "loss": 1.5219, "step": 10000 }, { "epoch": 35.95890410958904, "grad_norm": 2.2686374187469482, "learning_rate": 1.4539007092198581e-05, "loss": 1.4767, "step": 10500 }, { "epoch": 37.67123287671233, "grad_norm": 2.2740468978881836, "learning_rate": 1.2765957446808511e-05, "loss": 1.4248, "step": 11000 }, { "epoch": 39.38356164383562, "grad_norm": 2.060163736343384, "learning_rate": 1.0992907801418441e-05, "loss": 1.3853, "step": 11500 }, { "epoch": 41.0958904109589, "grad_norm": 2.2763679027557373, "learning_rate": 9.219858156028368e-06, "loss": 1.3555, "step": 12000 }, { "epoch": 42.80821917808219, "grad_norm": 2.2008297443389893, "learning_rate": 7.446808510638298e-06, "loss": 1.3242, "step": 12500 }, { "epoch": 44.52054794520548, "grad_norm": 2.5200419425964355, "learning_rate": 5.673758865248227e-06, "loss": 1.3008, "step": 13000 }, { "epoch": 46.23287671232877, "grad_norm": 2.2964625358581543, "learning_rate": 3.9007092198581565e-06, "loss": 1.2794, "step": 13500 }, { "epoch": 47.945205479452056, "grad_norm": 2.3451294898986816, "learning_rate": 2.1276595744680853e-06, "loss": 1.2691, "step": 14000 } ], "logging_steps": 500, "max_steps": 14600, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.0045885421584384e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }