{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3250711093051605, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016253555465258026, "grad_norm": 2.077186346054077, "learning_rate": 4.9998691031433496e-05, "loss": 2.8778, "step": 5 }, { "epoch": 0.03250711093051605, "grad_norm": 2.073504686355591, "learning_rate": 4.999476426280588e-05, "loss": 2.6432, "step": 10 }, { "epoch": 0.048760666395774074, "grad_norm": 2.1281931400299072, "learning_rate": 4.998822010531848e-05, "loss": 2.3167, "step": 15 }, { "epoch": 0.0650142218610321, "grad_norm": 1.090219497680664, "learning_rate": 4.997905924425903e-05, "loss": 2.1533, "step": 20 }, { "epoch": 0.08126777732629012, "grad_norm": 1.007896900177002, "learning_rate": 4.996728263892985e-05, "loss": 2.0267, "step": 25 }, { "epoch": 0.09752133279154815, "grad_norm": 0.9982665777206421, "learning_rate": 4.995289152254744e-05, "loss": 1.9352, "step": 30 }, { "epoch": 0.11377488825680618, "grad_norm": 0.8844298720359802, "learning_rate": 4.9935887402113315e-05, "loss": 1.9486, "step": 35 }, { "epoch": 0.1300284437220642, "grad_norm": 0.9337536692619324, "learning_rate": 4.991627205825621e-05, "loss": 1.9228, "step": 40 }, { "epoch": 0.14628199918732224, "grad_norm": 0.9377800822257996, "learning_rate": 4.9894047545045605e-05, "loss": 1.835, "step": 45 }, { "epoch": 0.16253555465258024, "grad_norm": 0.8525241017341614, "learning_rate": 4.986921618977664e-05, "loss": 1.8157, "step": 50 }, { "epoch": 0.17878911011783827, "grad_norm": 0.872871458530426, "learning_rate": 4.984178059272638e-05, "loss": 1.8811, "step": 55 }, { "epoch": 0.1950426655830963, "grad_norm": 0.9125804305076599, "learning_rate": 4.981174362688158e-05, "loss": 1.8242, "step": 60 }, { "epoch": 0.21129622104835433, "grad_norm": 0.7474733591079712, "learning_rate": 4.977910843763777e-05, "loss": 1.7808, "step": 65 }, { "epoch": 0.22754977651361236, "grad_norm": 1.0248199701309204, "learning_rate": 4.974387844246987e-05, "loss": 1.8512, "step": 70 }, { "epoch": 0.24380333197887039, "grad_norm": 0.9032835960388184, "learning_rate": 4.970605733057441e-05, "loss": 1.8172, "step": 75 }, { "epoch": 0.2600568874441284, "grad_norm": 0.8550340533256531, "learning_rate": 4.9665649062483115e-05, "loss": 1.7418, "step": 80 }, { "epoch": 0.27631044290938644, "grad_norm": 0.9429016709327698, "learning_rate": 4.96226578696482e-05, "loss": 1.8265, "step": 85 }, { "epoch": 0.2925639983746445, "grad_norm": 0.975885272026062, "learning_rate": 4.957708825399927e-05, "loss": 1.7943, "step": 90 }, { "epoch": 0.3088175538399025, "grad_norm": 0.827629804611206, "learning_rate": 4.9528944987471884e-05, "loss": 1.8067, "step": 95 }, { "epoch": 0.3250711093051605, "grad_norm": 1.0019093751907349, "learning_rate": 4.9478233111507856e-05, "loss": 1.7874, "step": 100 } ], "logging_steps": 5, "max_steps": 1535, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.205535116204442e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }