{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2718600953895072, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03179650238473768, "grad_norm": 2.815408945083618, "learning_rate": 4.998609830681734e-05, "loss": 2.9252, "step": 5 }, { "epoch": 0.06359300476947535, "grad_norm": 2.5928449630737305, "learning_rate": 4.994440868783522e-05, "loss": 2.4825, "step": 10 }, { "epoch": 0.09538950715421304, "grad_norm": 1.5238102674484253, "learning_rate": 4.987497750755702e-05, "loss": 2.2139, "step": 15 }, { "epoch": 0.1271860095389507, "grad_norm": 1.1545639038085938, "learning_rate": 4.977788198285995e-05, "loss": 2.0351, "step": 20 }, { "epoch": 0.1589825119236884, "grad_norm": 0.9941776990890503, "learning_rate": 4.965323009711954e-05, "loss": 2.1238, "step": 25 }, { "epoch": 0.1907790143084261, "grad_norm": 1.0189181566238403, "learning_rate": 4.950116048011739e-05, "loss": 1.9683, "step": 30 }, { "epoch": 0.22257551669316375, "grad_norm": 0.8917892575263977, "learning_rate": 4.9321842253866136e-05, "loss": 1.912, "step": 35 }, { "epoch": 0.2543720190779014, "grad_norm": 0.8906163573265076, "learning_rate": 4.911547484452286e-05, "loss": 1.916, "step": 40 }, { "epoch": 0.2861685214626391, "grad_norm": 0.9791882038116455, "learning_rate": 4.888228776060016e-05, "loss": 1.8891, "step": 45 }, { "epoch": 0.3179650238473768, "grad_norm": 0.9773047566413879, "learning_rate": 4.862254033772164e-05, "loss": 1.9039, "step": 50 }, { "epoch": 0.3497615262321145, "grad_norm": 1.0379596948623657, "learning_rate": 4.833652145020551e-05, "loss": 1.8915, "step": 55 }, { "epoch": 0.3815580286168522, "grad_norm": 0.9040148854255676, "learning_rate": 4.8024549189797276e-05, "loss": 1.887, "step": 60 }, { "epoch": 0.4133545310015898, "grad_norm": 1.0032649040222168, "learning_rate": 4.7686970511908594e-05, "loss": 1.9291, "step": 65 }, { "epoch": 0.4451510333863275, "grad_norm": 1.0851433277130127, "learning_rate": 4.732416084975585e-05, "loss": 1.8363, "step": 70 }, { "epoch": 0.4769475357710652, "grad_norm": 0.9705031514167786, "learning_rate": 4.6936523696827615e-05, "loss": 1.8624, "step": 75 }, { "epoch": 0.5087440381558028, "grad_norm": 0.991225004196167, "learning_rate": 4.652449015814518e-05, "loss": 1.8554, "step": 80 }, { "epoch": 0.5405405405405406, "grad_norm": 1.061652660369873, "learning_rate": 4.608851847081542e-05, "loss": 1.8476, "step": 85 }, { "epoch": 0.5723370429252782, "grad_norm": 1.126745581626892, "learning_rate": 4.562909349440899e-05, "loss": 1.7998, "step": 90 }, { "epoch": 0.604133545310016, "grad_norm": 1.155612826347351, "learning_rate": 4.514672617173091e-05, "loss": 1.9258, "step": 95 }, { "epoch": 0.6359300476947536, "grad_norm": 1.0810774564743042, "learning_rate": 4.4641952960582877e-05, "loss": 1.7941, "step": 100 }, { "epoch": 0.6677265500794912, "grad_norm": 1.1277074813842773, "learning_rate": 4.411533523714954e-05, "loss": 1.8889, "step": 105 }, { "epoch": 0.699523052464229, "grad_norm": 1.1673099994659424, "learning_rate": 4.3567458671672154e-05, "loss": 1.8287, "step": 110 }, { "epoch": 0.7313195548489666, "grad_norm": 1.141694188117981, "learning_rate": 4.299893257710394e-05, "loss": 1.7811, "step": 115 }, { "epoch": 0.7631160572337043, "grad_norm": 1.0848796367645264, "learning_rate": 4.241038923147154e-05, "loss": 1.7925, "step": 120 }, { "epoch": 0.794912559618442, "grad_norm": 1.1177420616149902, "learning_rate": 4.1802483174696214e-05, "loss": 1.8646, "step": 125 }, { "epoch": 0.8267090620031796, "grad_norm": 1.2627627849578857, "learning_rate": 4.117589048065677e-05, "loss": 1.8213, "step": 130 }, { "epoch": 0.8585055643879174, "grad_norm": 1.107101321220398, "learning_rate": 4.053130800530386e-05, "loss": 1.8301, "step": 135 }, { "epoch": 0.890302066772655, "grad_norm": 1.219338059425354, "learning_rate": 3.986945261166174e-05, "loss": 1.8081, "step": 140 }, { "epoch": 0.9220985691573926, "grad_norm": 1.2252509593963623, "learning_rate": 3.91910603725795e-05, "loss": 1.8472, "step": 145 }, { "epoch": 0.9538950715421304, "grad_norm": 1.1074373722076416, "learning_rate": 3.849688575211836e-05, "loss": 1.8186, "step": 150 }, { "epoch": 0.985691573926868, "grad_norm": 1.1255358457565308, "learning_rate": 3.778770076648543e-05, "loss": 1.8542, "step": 155 }, { "epoch": 1.0174880763116056, "grad_norm": 1.2812623977661133, "learning_rate": 3.706429412544711e-05, "loss": 1.6997, "step": 160 }, { "epoch": 1.0492845786963434, "grad_norm": 1.1419382095336914, "learning_rate": 3.632747035517701e-05, "loss": 1.8274, "step": 165 }, { "epoch": 1.0810810810810811, "grad_norm": 1.2307173013687134, "learning_rate": 3.557804890351383e-05, "loss": 1.766, "step": 170 }, { "epoch": 1.1128775834658187, "grad_norm": 1.2607144117355347, "learning_rate": 3.481686322862443e-05, "loss": 1.7501, "step": 175 }, { "epoch": 1.1446740858505564, "grad_norm": 1.174317717552185, "learning_rate": 3.404475987208539e-05, "loss": 1.7754, "step": 180 }, { "epoch": 1.1764705882352942, "grad_norm": 1.224722146987915, "learning_rate": 3.326259751741414e-05, "loss": 1.8031, "step": 185 }, { "epoch": 1.2082670906200317, "grad_norm": 1.4189472198486328, "learning_rate": 3.247124603509659e-05, "loss": 1.7226, "step": 190 }, { "epoch": 1.2400635930047694, "grad_norm": 1.2927441596984863, "learning_rate": 3.167158551517326e-05, "loss": 1.7752, "step": 195 }, { "epoch": 1.2718600953895072, "grad_norm": 1.361237645149231, "learning_rate": 3.0864505288460034e-05, "loss": 1.7353, "step": 200 } ], "logging_steps": 5, "max_steps": 471, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.963049291671142e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }