{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.061794414735591205, "eval_steps": 62, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0030897207367795603, "grad_norm": 9338.8359375, "learning_rate": 4e-12, "loss": 13.2844, "step": 1 }, { "epoch": 0.006179441473559121, "grad_norm": 9144.1259765625, "learning_rate": 8e-12, "loss": 12.994, "step": 2 }, { "epoch": 0.009269162210338681, "grad_norm": 9721.224609375, "learning_rate": 1.1999999999999999e-11, "loss": 12.6655, "step": 3 }, { "epoch": 0.012358882947118241, "grad_norm": 8743.544921875, "learning_rate": 1.6e-11, "loss": 13.5894, "step": 4 }, { "epoch": 0.015448603683897801, "grad_norm": 9426.9677734375, "learning_rate": 2e-11, "loss": 12.8587, "step": 5 }, { "epoch": 0.018538324420677363, "grad_norm": 9453.294921875, "learning_rate": 1.998481516433316e-11, "loss": 12.4593, "step": 6 }, { "epoch": 0.02162804515745692, "grad_norm": 9499.736328125, "learning_rate": 1.9939306773179494e-11, "loss": 12.7375, "step": 7 }, { "epoch": 0.024717765894236483, "grad_norm": 9644.2998046875, "learning_rate": 1.9863613034027223e-11, "loss": 12.5361, "step": 8 }, { "epoch": 0.027807486631016044, "grad_norm": 9461.044921875, "learning_rate": 1.9757963826274354e-11, "loss": 12.4943, "step": 9 }, { "epoch": 0.030897207367795602, "grad_norm": 10506.08203125, "learning_rate": 1.96226800030925e-11, "loss": 11.0811, "step": 10 }, { "epoch": 0.03398692810457516, "grad_norm": 9562.708984375, "learning_rate": 1.9458172417006344e-11, "loss": 12.1782, "step": 11 }, { "epoch": 0.037076648841354726, "grad_norm": 8141.35791015625, "learning_rate": 1.9264940672148017e-11, "loss": 14.1307, "step": 12 }, { "epoch": 0.040166369578134284, "grad_norm": 10255.396484375, "learning_rate": 1.9043571606975776e-11, "loss": 12.0333, "step": 13 }, { "epoch": 0.04325609031491384, "grad_norm": 9540.162109375, "learning_rate": 1.879473751206489e-11, "loss": 12.4084, "step": 14 }, { "epoch": 0.04634581105169341, "grad_norm": 9645.5986328125, "learning_rate": 1.851919408838327e-11, "loss": 13.0228, "step": 15 }, { "epoch": 0.049435531788472965, "grad_norm": 10008.0263671875, "learning_rate": 1.821777815225245e-11, "loss": 11.6848, "step": 16 }, { "epoch": 0.052525252525252523, "grad_norm": 10316.1103515625, "learning_rate": 1.7891405093963936e-11, "loss": 11.8311, "step": 17 }, { "epoch": 0.05561497326203209, "grad_norm": 8724.0751953125, "learning_rate": 1.754106609776896e-11, "loss": 13.4673, "step": 18 }, { "epoch": 0.05870469399881165, "grad_norm": 9629.205078125, "learning_rate": 1.7167825131684512e-11, "loss": 12.7426, "step": 19 }, { "epoch": 0.061794414735591205, "grad_norm": 9684.396484375, "learning_rate": 1.6772815716257412e-11, "loss": 11.9996, "step": 20 } ], "logging_steps": 1, "max_steps": 62, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.318758614499328e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }