{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5952380952380952, "eval_steps": 25, "global_step": 475, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03132832080200501, "grad_norm": 3.161522626876831, "learning_rate": 2.3797595190380762e-05, "loss": 2.5111, "step": 25 }, { "epoch": 0.03132832080200501, "eval_loss": 2.4335546493530273, "eval_runtime": 722.1607, "eval_samples_per_second": 2.209, "eval_steps_per_second": 0.277, "step": 25 }, { "epoch": 0.06265664160401002, "grad_norm": 6.950015068054199, "learning_rate": 2.2545090180360722e-05, "loss": 2.3676, "step": 50 }, { "epoch": 0.06265664160401002, "eval_loss": 2.241159439086914, "eval_runtime": 724.4073, "eval_samples_per_second": 2.202, "eval_steps_per_second": 0.276, "step": 50 }, { "epoch": 0.09398496240601503, "grad_norm": 8.31242847442627, "learning_rate": 2.1292585170340683e-05, "loss": 2.2132, "step": 75 }, { "epoch": 0.09398496240601503, "eval_loss": 2.1175358295440674, "eval_runtime": 720.5313, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.278, "step": 75 }, { "epoch": 0.12531328320802004, "grad_norm": 11.33967399597168, "learning_rate": 2.0040080160320643e-05, "loss": 2.1464, "step": 100 }, { "epoch": 0.12531328320802004, "eval_loss": 2.0354082584381104, "eval_runtime": 721.3533, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.277, "step": 100 }, { "epoch": 0.15664160401002505, "grad_norm": 6.970797061920166, "learning_rate": 1.87875751503006e-05, "loss": 2.0109, "step": 125 }, { "epoch": 0.15664160401002505, "eval_loss": 1.9767951965332031, "eval_runtime": 720.7941, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.277, "step": 125 }, { "epoch": 0.18796992481203006, "grad_norm": 15.953432083129883, "learning_rate": 1.7535070140280564e-05, "loss": 2.0434, "step": 150 }, { "epoch": 0.18796992481203006, "eval_loss": 1.9400724172592163, "eval_runtime": 725.3731, "eval_samples_per_second": 2.199, "eval_steps_per_second": 0.276, "step": 150 }, { "epoch": 0.21929824561403508, "grad_norm": 9.164532661437988, "learning_rate": 1.628256513026052e-05, "loss": 1.9165, "step": 175 }, { "epoch": 0.21929824561403508, "eval_loss": 1.916611671447754, "eval_runtime": 721.2615, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.277, "step": 175 }, { "epoch": 0.2506265664160401, "grad_norm": 11.897910118103027, "learning_rate": 1.5030060120240483e-05, "loss": 2.0037, "step": 200 }, { "epoch": 0.2506265664160401, "eval_loss": 1.8966965675354004, "eval_runtime": 721.0344, "eval_samples_per_second": 2.212, "eval_steps_per_second": 0.277, "step": 200 }, { "epoch": 0.2819548872180451, "grad_norm": 16.23155403137207, "learning_rate": 1.3777555110220442e-05, "loss": 1.9693, "step": 225 }, { "epoch": 0.2819548872180451, "eval_loss": 1.8825273513793945, "eval_runtime": 720.7907, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.277, "step": 225 }, { "epoch": 0.3132832080200501, "grad_norm": 11.5318021774292, "learning_rate": 1.25250501002004e-05, "loss": 1.8635, "step": 250 }, { "epoch": 0.3132832080200501, "eval_loss": 1.8641014099121094, "eval_runtime": 720.4196, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.278, "step": 250 }, { "epoch": 0.34461152882205515, "grad_norm": 16.45474624633789, "learning_rate": 1.1272545090180361e-05, "loss": 1.8685, "step": 275 }, { "epoch": 0.34461152882205515, "eval_loss": 1.852317452430725, "eval_runtime": 720.5223, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.278, "step": 275 }, { "epoch": 0.37593984962406013, "grad_norm": 17.96300506591797, "learning_rate": 1.0020040080160322e-05, "loss": 1.7522, "step": 300 }, { "epoch": 0.37593984962406013, "eval_loss": 1.8399490118026733, "eval_runtime": 720.4464, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.278, "step": 300 }, { "epoch": 0.40726817042606517, "grad_norm": 13.310808181762695, "learning_rate": 8.767535070140282e-06, "loss": 1.8459, "step": 325 }, { "epoch": 0.40726817042606517, "eval_loss": 1.8312922716140747, "eval_runtime": 720.395, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.278, "step": 325 }, { "epoch": 0.43859649122807015, "grad_norm": 13.073380470275879, "learning_rate": 7.515030060120242e-06, "loss": 1.8879, "step": 350 }, { "epoch": 0.43859649122807015, "eval_loss": 1.8224186897277832, "eval_runtime": 720.0905, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.278, "step": 350 }, { "epoch": 0.4699248120300752, "grad_norm": 15.554895401000977, "learning_rate": 6.2625250501002e-06, "loss": 1.9055, "step": 375 }, { "epoch": 0.4699248120300752, "eval_loss": 1.8179148435592651, "eval_runtime": 721.2094, "eval_samples_per_second": 2.212, "eval_steps_per_second": 0.277, "step": 375 }, { "epoch": 0.5012531328320802, "grad_norm": 15.814194679260254, "learning_rate": 5.010020040080161e-06, "loss": 1.8801, "step": 400 }, { "epoch": 0.5012531328320802, "eval_loss": 1.8140182495117188, "eval_runtime": 721.3168, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.277, "step": 400 }, { "epoch": 0.5325814536340853, "grad_norm": 9.506321907043457, "learning_rate": 3.757515030060121e-06, "loss": 1.8526, "step": 425 }, { "epoch": 0.5325814536340853, "eval_loss": 1.8105257749557495, "eval_runtime": 720.746, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.277, "step": 425 }, { "epoch": 0.5639097744360902, "grad_norm": 16.16132926940918, "learning_rate": 2.5050100200400804e-06, "loss": 1.89, "step": 450 }, { "epoch": 0.5639097744360902, "eval_loss": 1.8077690601348877, "eval_runtime": 721.7445, "eval_samples_per_second": 2.21, "eval_steps_per_second": 0.277, "step": 450 }, { "epoch": 0.5952380952380952, "grad_norm": 15.308736801147461, "learning_rate": 1.2525050100200402e-06, "loss": 1.8555, "step": 475 }, { "epoch": 0.5952380952380952, "eval_loss": 1.8060969114303589, "eval_runtime": 722.1684, "eval_samples_per_second": 2.209, "eval_steps_per_second": 0.277, "step": 475 } ], "logging_steps": 25, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1489311152517120.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }