{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.650142218610321, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016253555465258026, "grad_norm": 2.077186346054077, "learning_rate": 4.9998691031433496e-05, "loss": 2.8778, "step": 5 }, { "epoch": 0.03250711093051605, "grad_norm": 2.073504686355591, "learning_rate": 4.999476426280588e-05, "loss": 2.6432, "step": 10 }, { "epoch": 0.048760666395774074, "grad_norm": 2.1281931400299072, "learning_rate": 4.998822010531848e-05, "loss": 2.3167, "step": 15 }, { "epoch": 0.0650142218610321, "grad_norm": 1.090219497680664, "learning_rate": 4.997905924425903e-05, "loss": 2.1533, "step": 20 }, { "epoch": 0.08126777732629012, "grad_norm": 1.007896900177002, "learning_rate": 4.996728263892985e-05, "loss": 2.0267, "step": 25 }, { "epoch": 0.09752133279154815, "grad_norm": 0.9982665777206421, "learning_rate": 4.995289152254744e-05, "loss": 1.9352, "step": 30 }, { "epoch": 0.11377488825680618, "grad_norm": 0.8844298720359802, "learning_rate": 4.9935887402113315e-05, "loss": 1.9486, "step": 35 }, { "epoch": 0.1300284437220642, "grad_norm": 0.9337536692619324, "learning_rate": 4.991627205825621e-05, "loss": 1.9228, "step": 40 }, { "epoch": 0.14628199918732224, "grad_norm": 0.9377800822257996, "learning_rate": 4.9894047545045605e-05, "loss": 1.835, "step": 45 }, { "epoch": 0.16253555465258024, "grad_norm": 0.8525241017341614, "learning_rate": 4.986921618977664e-05, "loss": 1.8157, "step": 50 }, { "epoch": 0.17878911011783827, "grad_norm": 0.872871458530426, "learning_rate": 4.984178059272638e-05, "loss": 1.8811, "step": 55 }, { "epoch": 0.1950426655830963, "grad_norm": 0.9125804305076599, "learning_rate": 4.981174362688158e-05, "loss": 1.8242, "step": 60 }, { "epoch": 0.21129622104835433, "grad_norm": 0.7474733591079712, "learning_rate": 4.977910843763777e-05, "loss": 1.7808, "step": 65 }, { "epoch": 0.22754977651361236, "grad_norm": 1.0248199701309204, "learning_rate": 4.974387844246987e-05, "loss": 1.8512, "step": 70 }, { "epoch": 0.24380333197887039, "grad_norm": 0.9032835960388184, "learning_rate": 4.970605733057441e-05, "loss": 1.8172, "step": 75 }, { "epoch": 0.2600568874441284, "grad_norm": 0.8550340533256531, "learning_rate": 4.9665649062483115e-05, "loss": 1.7418, "step": 80 }, { "epoch": 0.27631044290938644, "grad_norm": 0.9429016709327698, "learning_rate": 4.96226578696482e-05, "loss": 1.8265, "step": 85 }, { "epoch": 0.2925639983746445, "grad_norm": 0.975885272026062, "learning_rate": 4.957708825399927e-05, "loss": 1.7943, "step": 90 }, { "epoch": 0.3088175538399025, "grad_norm": 0.827629804611206, "learning_rate": 4.9528944987471884e-05, "loss": 1.8067, "step": 95 }, { "epoch": 0.3250711093051605, "grad_norm": 1.0019093751907349, "learning_rate": 4.9478233111507856e-05, "loss": 1.7874, "step": 100 }, { "epoch": 0.3413246647704185, "grad_norm": 1.0811312198638916, "learning_rate": 4.9424957936527295e-05, "loss": 1.7395, "step": 105 }, { "epoch": 0.35757822023567654, "grad_norm": 0.9461565613746643, "learning_rate": 4.936912504137257e-05, "loss": 1.7833, "step": 110 }, { "epoch": 0.37383177570093457, "grad_norm": 1.0386009216308594, "learning_rate": 4.9310740272724055e-05, "loss": 1.7569, "step": 115 }, { "epoch": 0.3900853311661926, "grad_norm": 0.9916568994522095, "learning_rate": 4.924980974448791e-05, "loss": 1.7584, "step": 120 }, { "epoch": 0.4063388866314506, "grad_norm": 1.240400791168213, "learning_rate": 4.918633983715581e-05, "loss": 1.7438, "step": 125 }, { "epoch": 0.42259244209670865, "grad_norm": 1.1441287994384766, "learning_rate": 4.912033719713687e-05, "loss": 1.7046, "step": 130 }, { "epoch": 0.4388459975619667, "grad_norm": 1.1687984466552734, "learning_rate": 4.905180873606157e-05, "loss": 1.738, "step": 135 }, { "epoch": 0.4550995530272247, "grad_norm": 0.9126319885253906, "learning_rate": 4.8980761630058014e-05, "loss": 1.7738, "step": 140 }, { "epoch": 0.47135310849248274, "grad_norm": 1.172781229019165, "learning_rate": 4.8907203319000495e-05, "loss": 1.6642, "step": 145 }, { "epoch": 0.48760666395774077, "grad_norm": 1.1217776536941528, "learning_rate": 4.883114150573037e-05, "loss": 1.7359, "step": 150 }, { "epoch": 0.5038602194229987, "grad_norm": 1.1024829149246216, "learning_rate": 4.8752584155249444e-05, "loss": 1.6707, "step": 155 }, { "epoch": 0.5201137748882568, "grad_norm": 1.042017936706543, "learning_rate": 4.8671539493885934e-05, "loss": 1.709, "step": 160 }, { "epoch": 0.5363673303535148, "grad_norm": 1.1834771633148193, "learning_rate": 4.8588016008432945e-05, "loss": 1.7282, "step": 165 }, { "epoch": 0.5526208858187729, "grad_norm": 1.0311099290847778, "learning_rate": 4.850202244525987e-05, "loss": 1.774, "step": 170 }, { "epoch": 0.5688744412840309, "grad_norm": 1.2995566129684448, "learning_rate": 4.8413567809396376e-05, "loss": 1.714, "step": 175 }, { "epoch": 0.585127996749289, "grad_norm": 1.23491370677948, "learning_rate": 4.8322661363589507e-05, "loss": 1.7717, "step": 180 }, { "epoch": 0.6013815522145469, "grad_norm": 1.1809489727020264, "learning_rate": 4.822931262733367e-05, "loss": 1.722, "step": 185 }, { "epoch": 0.617635107679805, "grad_norm": 0.988572359085083, "learning_rate": 4.813353137587377e-05, "loss": 1.7007, "step": 190 }, { "epoch": 0.633888663145063, "grad_norm": 1.0803310871124268, "learning_rate": 4.803532763918162e-05, "loss": 1.6824, "step": 195 }, { "epoch": 0.650142218610321, "grad_norm": 1.02713143825531, "learning_rate": 4.793471170090555e-05, "loss": 1.6453, "step": 200 } ], "logging_steps": 5, "max_steps": 1535, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0362791277468058e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }