{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9752133279154815, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016253555465258026, "grad_norm": 2.077186346054077, "learning_rate": 4.9998691031433496e-05, "loss": 2.8778, "step": 5 }, { "epoch": 0.03250711093051605, "grad_norm": 2.073504686355591, "learning_rate": 4.999476426280588e-05, "loss": 2.6432, "step": 10 }, { "epoch": 0.048760666395774074, "grad_norm": 2.1281931400299072, "learning_rate": 4.998822010531848e-05, "loss": 2.3167, "step": 15 }, { "epoch": 0.0650142218610321, "grad_norm": 1.090219497680664, "learning_rate": 4.997905924425903e-05, "loss": 2.1533, "step": 20 }, { "epoch": 0.08126777732629012, "grad_norm": 1.007896900177002, "learning_rate": 4.996728263892985e-05, "loss": 2.0267, "step": 25 }, { "epoch": 0.09752133279154815, "grad_norm": 0.9982665777206421, "learning_rate": 4.995289152254744e-05, "loss": 1.9352, "step": 30 }, { "epoch": 0.11377488825680618, "grad_norm": 0.8844298720359802, "learning_rate": 4.9935887402113315e-05, "loss": 1.9486, "step": 35 }, { "epoch": 0.1300284437220642, "grad_norm": 0.9337536692619324, "learning_rate": 4.991627205825621e-05, "loss": 1.9228, "step": 40 }, { "epoch": 0.14628199918732224, "grad_norm": 0.9377800822257996, "learning_rate": 4.9894047545045605e-05, "loss": 1.835, "step": 45 }, { "epoch": 0.16253555465258024, "grad_norm": 0.8525241017341614, "learning_rate": 4.986921618977664e-05, "loss": 1.8157, "step": 50 }, { "epoch": 0.17878911011783827, "grad_norm": 0.872871458530426, "learning_rate": 4.984178059272638e-05, "loss": 1.8811, "step": 55 }, { "epoch": 0.1950426655830963, "grad_norm": 0.9125804305076599, "learning_rate": 4.981174362688158e-05, "loss": 1.8242, "step": 60 }, { "epoch": 0.21129622104835433, "grad_norm": 0.7474733591079712, "learning_rate": 4.977910843763777e-05, "loss": 1.7808, "step": 65 }, { "epoch": 0.22754977651361236, "grad_norm": 1.0248199701309204, "learning_rate": 4.974387844246987e-05, "loss": 1.8512, "step": 70 }, { "epoch": 0.24380333197887039, "grad_norm": 0.9032835960388184, "learning_rate": 4.970605733057441e-05, "loss": 1.8172, "step": 75 }, { "epoch": 0.2600568874441284, "grad_norm": 0.8550340533256531, "learning_rate": 4.9665649062483115e-05, "loss": 1.7418, "step": 80 }, { "epoch": 0.27631044290938644, "grad_norm": 0.9429016709327698, "learning_rate": 4.96226578696482e-05, "loss": 1.8265, "step": 85 }, { "epoch": 0.2925639983746445, "grad_norm": 0.975885272026062, "learning_rate": 4.957708825399927e-05, "loss": 1.7943, "step": 90 }, { "epoch": 0.3088175538399025, "grad_norm": 0.827629804611206, "learning_rate": 4.9528944987471884e-05, "loss": 1.8067, "step": 95 }, { "epoch": 0.3250711093051605, "grad_norm": 1.0019093751907349, "learning_rate": 4.9478233111507856e-05, "loss": 1.7874, "step": 100 }, { "epoch": 0.3413246647704185, "grad_norm": 1.0811312198638916, "learning_rate": 4.9424957936527295e-05, "loss": 1.7395, "step": 105 }, { "epoch": 0.35757822023567654, "grad_norm": 0.9461565613746643, "learning_rate": 4.936912504137257e-05, "loss": 1.7833, "step": 110 }, { "epoch": 0.37383177570093457, "grad_norm": 1.0386009216308594, "learning_rate": 4.9310740272724055e-05, "loss": 1.7569, "step": 115 }, { "epoch": 0.3900853311661926, "grad_norm": 0.9916568994522095, "learning_rate": 4.924980974448791e-05, "loss": 1.7584, "step": 120 }, { "epoch": 0.4063388866314506, "grad_norm": 1.240400791168213, "learning_rate": 4.918633983715581e-05, "loss": 1.7438, "step": 125 }, { "epoch": 0.42259244209670865, "grad_norm": 1.1441287994384766, "learning_rate": 4.912033719713687e-05, "loss": 1.7046, "step": 130 }, { "epoch": 0.4388459975619667, "grad_norm": 1.1687984466552734, "learning_rate": 4.905180873606157e-05, "loss": 1.738, "step": 135 }, { "epoch": 0.4550995530272247, "grad_norm": 0.9126319885253906, "learning_rate": 4.8980761630058014e-05, "loss": 1.7738, "step": 140 }, { "epoch": 0.47135310849248274, "grad_norm": 1.172781229019165, "learning_rate": 4.8907203319000495e-05, "loss": 1.6642, "step": 145 }, { "epoch": 0.48760666395774077, "grad_norm": 1.1217776536941528, "learning_rate": 4.883114150573037e-05, "loss": 1.7359, "step": 150 }, { "epoch": 0.5038602194229987, "grad_norm": 1.1024829149246216, "learning_rate": 4.8752584155249444e-05, "loss": 1.6707, "step": 155 }, { "epoch": 0.5201137748882568, "grad_norm": 1.042017936706543, "learning_rate": 4.8671539493885934e-05, "loss": 1.709, "step": 160 }, { "epoch": 0.5363673303535148, "grad_norm": 1.1834771633148193, "learning_rate": 4.8588016008432945e-05, "loss": 1.7282, "step": 165 }, { "epoch": 0.5526208858187729, "grad_norm": 1.0311099290847778, "learning_rate": 4.850202244525987e-05, "loss": 1.774, "step": 170 }, { "epoch": 0.5688744412840309, "grad_norm": 1.2995566129684448, "learning_rate": 4.8413567809396376e-05, "loss": 1.714, "step": 175 }, { "epoch": 0.585127996749289, "grad_norm": 1.23491370677948, "learning_rate": 4.8322661363589507e-05, "loss": 1.7717, "step": 180 }, { "epoch": 0.6013815522145469, "grad_norm": 1.1809489727020264, "learning_rate": 4.822931262733367e-05, "loss": 1.722, "step": 185 }, { "epoch": 0.617635107679805, "grad_norm": 0.988572359085083, "learning_rate": 4.813353137587377e-05, "loss": 1.7007, "step": 190 }, { "epoch": 0.633888663145063, "grad_norm": 1.0803310871124268, "learning_rate": 4.803532763918162e-05, "loss": 1.6824, "step": 195 }, { "epoch": 0.650142218610321, "grad_norm": 1.02713143825531, "learning_rate": 4.793471170090555e-05, "loss": 1.6453, "step": 200 }, { "epoch": 0.666395774075579, "grad_norm": 1.2023831605911255, "learning_rate": 4.783169409729362e-05, "loss": 1.6974, "step": 205 }, { "epoch": 0.682649329540837, "grad_norm": 1.0009326934814453, "learning_rate": 4.772628561609022e-05, "loss": 1.7094, "step": 210 }, { "epoch": 0.6989028850060951, "grad_norm": 1.269020438194275, "learning_rate": 4.761849729540643e-05, "loss": 1.6832, "step": 215 }, { "epoch": 0.7151564404713531, "grad_norm": 1.3129128217697144, "learning_rate": 4.7508340422564134e-05, "loss": 1.6847, "step": 220 }, { "epoch": 0.7314099959366112, "grad_norm": 1.0837377309799194, "learning_rate": 4.7395826532914054e-05, "loss": 1.7215, "step": 225 }, { "epoch": 0.7476635514018691, "grad_norm": 1.1211832761764526, "learning_rate": 4.728096740862778e-05, "loss": 1.7658, "step": 230 }, { "epoch": 0.7639171068671272, "grad_norm": 1.1422042846679688, "learning_rate": 4.716377507746397e-05, "loss": 1.7098, "step": 235 }, { "epoch": 0.7801706623323852, "grad_norm": 1.2925424575805664, "learning_rate": 4.704426181150884e-05, "loss": 1.7504, "step": 240 }, { "epoch": 0.7964242177976433, "grad_norm": 1.282771348953247, "learning_rate": 4.6922440125891064e-05, "loss": 1.72, "step": 245 }, { "epoch": 0.8126777732629012, "grad_norm": 1.2494529485702515, "learning_rate": 4.6798322777471216e-05, "loss": 1.6581, "step": 250 }, { "epoch": 0.8289313287281593, "grad_norm": 1.2736073732376099, "learning_rate": 4.667192276350591e-05, "loss": 1.6825, "step": 255 }, { "epoch": 0.8451848841934173, "grad_norm": 1.1477563381195068, "learning_rate": 4.654325332028676e-05, "loss": 1.6889, "step": 260 }, { "epoch": 0.8614384396586753, "grad_norm": 1.2415803670883179, "learning_rate": 4.641232792175428e-05, "loss": 1.6586, "step": 265 }, { "epoch": 0.8776919951239334, "grad_norm": 1.3159024715423584, "learning_rate": 4.6279160278086994e-05, "loss": 1.7102, "step": 270 }, { "epoch": 0.8939455505891913, "grad_norm": 1.2742581367492676, "learning_rate": 4.614376433426565e-05, "loss": 1.7476, "step": 275 }, { "epoch": 0.9101991060544494, "grad_norm": 1.34221351146698, "learning_rate": 4.6006154268613014e-05, "loss": 1.7497, "step": 280 }, { "epoch": 0.9264526615197074, "grad_norm": 1.1669361591339111, "learning_rate": 4.586634449130911e-05, "loss": 1.6693, "step": 285 }, { "epoch": 0.9427062169849655, "grad_norm": 1.2765443325042725, "learning_rate": 4.572434964288226e-05, "loss": 1.5862, "step": 290 }, { "epoch": 0.9589597724502235, "grad_norm": 1.2429810762405396, "learning_rate": 4.55801845926759e-05, "loss": 1.6875, "step": 295 }, { "epoch": 0.9752133279154815, "grad_norm": 1.2103233337402344, "learning_rate": 4.543386443729157e-05, "loss": 1.7294, "step": 300 } ], "logging_steps": 5, "max_steps": 1535, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.547524620512133e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }