{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4043807919123842, "eval_steps": 15, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02527379949452401, "grad_norm": 14.990800857543945, "learning_rate": 9.999802884287873e-06, "loss": 2.5024, "step": 15 }, { "epoch": 0.02527379949452401, "eval_loss": 2.251873016357422, "eval_runtime": 2381.2021, "eval_samples_per_second": 0.519, "eval_steps_per_second": 0.13, "step": 15 }, { "epoch": 0.05054759898904802, "grad_norm": 11.093191146850586, "learning_rate": 9.996846459432971e-06, "loss": 2.0154, "step": 30 }, { "epoch": 0.05054759898904802, "eval_loss": 1.9771775007247925, "eval_runtime": 2379.4231, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 30 }, { "epoch": 0.07582139848357203, "grad_norm": 10.310027122497559, "learning_rate": 9.990344375946395e-06, "loss": 1.8536, "step": 45 }, { "epoch": 0.07582139848357203, "eval_loss": 1.8929402828216553, "eval_runtime": 2379.7201, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 45 }, { "epoch": 0.10109519797809605, "grad_norm": 14.144546508789062, "learning_rate": 9.980301247571758e-06, "loss": 1.8062, "step": 60 }, { "epoch": 0.10109519797809605, "eval_loss": 1.8586353063583374, "eval_runtime": 2379.9138, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 60 }, { "epoch": 0.12636899747262004, "grad_norm": 12.928871154785156, "learning_rate": 9.966724200704695e-06, "loss": 1.8739, "step": 75 }, { "epoch": 0.12636899747262004, "eval_loss": 1.8373581171035767, "eval_runtime": 2379.1727, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 75 }, { "epoch": 0.15164279696714406, "grad_norm": 11.413592338562012, "learning_rate": 9.94962286933613e-06, "loss": 1.8687, "step": 90 }, { "epoch": 0.15164279696714406, "eval_loss": 1.8189234733581543, "eval_runtime": 2379.5736, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 90 }, { "epoch": 0.17691659646166807, "grad_norm": 10.060262680053711, "learning_rate": 9.929009388216183e-06, "loss": 1.749, "step": 105 }, { "epoch": 0.17691659646166807, "eval_loss": 1.8106799125671387, "eval_runtime": 2379.2612, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 105 }, { "epoch": 0.2021903959561921, "grad_norm": 11.168642044067383, "learning_rate": 9.904898384243608e-06, "loss": 1.826, "step": 120 }, { "epoch": 0.2021903959561921, "eval_loss": 1.8040649890899658, "eval_runtime": 2380.0282, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 120 }, { "epoch": 0.22746419545071608, "grad_norm": 9.17194938659668, "learning_rate": 9.877306966086854e-06, "loss": 1.7828, "step": 135 }, { "epoch": 0.22746419545071608, "eval_loss": 1.7994695901870728, "eval_runtime": 2379.6821, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 135 }, { "epoch": 0.2527379949452401, "grad_norm": 9.84255599975586, "learning_rate": 9.846254712044102e-06, "loss": 1.7225, "step": 150 }, { "epoch": 0.2527379949452401, "eval_loss": 1.7961242198944092, "eval_runtime": 2379.9202, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 150 }, { "epoch": 0.2780117944397641, "grad_norm": 8.864015579223633, "learning_rate": 9.811763656150912e-06, "loss": 1.8227, "step": 165 }, { "epoch": 0.2780117944397641, "eval_loss": 1.7934980392456055, "eval_runtime": 2379.8003, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 165 }, { "epoch": 0.3032855939342881, "grad_norm": 10.471166610717773, "learning_rate": 9.773858272545329e-06, "loss": 1.7436, "step": 180 }, { "epoch": 0.3032855939342881, "eval_loss": 1.791121006011963, "eval_runtime": 2379.8892, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 180 }, { "epoch": 0.32855939342881213, "grad_norm": 12.403428077697754, "learning_rate": 9.732565458101545e-06, "loss": 1.843, "step": 195 }, { "epoch": 0.32855939342881213, "eval_loss": 1.7891260385513306, "eval_runtime": 2379.2135, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 195 }, { "epoch": 0.35383319292333615, "grad_norm": 10.464547157287598, "learning_rate": 9.687914513344432e-06, "loss": 1.7454, "step": 210 }, { "epoch": 0.35383319292333615, "eval_loss": 1.78617262840271, "eval_runtime": 2378.8348, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 210 }, { "epoch": 0.37910699241786017, "grad_norm": 8.834742546081543, "learning_rate": 9.639937121658492e-06, "loss": 1.7015, "step": 225 }, { "epoch": 0.37910699241786017, "eval_loss": 1.7838687896728516, "eval_runtime": 2379.7979, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 225 }, { "epoch": 0.4043807919123842, "grad_norm": 8.59927749633789, "learning_rate": 9.588667326805996e-06, "loss": 1.7009, "step": 240 }, { "epoch": 0.4043807919123842, "eval_loss": 1.7831588983535767, "eval_runtime": 2379.7143, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.13, "step": 240 } ], "logging_steps": 15, "max_steps": 1779, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 15, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.4792379146940416e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }