{ "best_metric": 0.8349647521972656, "best_model_checkpoint": "saves/LLaMA3-8B/lora/train_1/checkpoint-240", "epoch": 0.4812687995624829, "eval_steps": 40, "global_step": 440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04375170905113481, "grad_norm": 0.8457286357879639, "learning_rate": 0.00014457831325301204, "loss": 1.211, "step": 40 }, { "epoch": 0.04375170905113481, "eval_loss": 0.8552733659744263, "eval_runtime": 212.5084, "eval_samples_per_second": 22.173, "eval_steps_per_second": 0.696, "step": 40 }, { "epoch": 0.08750341810226962, "grad_norm": 0.5710806846618652, "learning_rate": 0.0002891566265060241, "loss": 0.4676, "step": 80 }, { "epoch": 0.08750341810226962, "eval_loss": 0.8505380153656006, "eval_runtime": 212.6707, "eval_samples_per_second": 22.156, "eval_steps_per_second": 0.696, "step": 80 }, { "epoch": 0.13125512715340443, "grad_norm": 0.5647861361503601, "learning_rate": 0.00029985669589905196, "loss": 0.4316, "step": 120 }, { "epoch": 0.13125512715340443, "eval_loss": 0.8580976128578186, "eval_runtime": 212.4255, "eval_samples_per_second": 22.182, "eval_steps_per_second": 0.697, "step": 120 }, { "epoch": 0.17500683620453925, "grad_norm": 0.36067792773246765, "learning_rate": 0.0002993796936474492, "loss": 0.4109, "step": 160 }, { "epoch": 0.17500683620453925, "eval_loss": 0.8414877653121948, "eval_runtime": 212.0889, "eval_samples_per_second": 22.217, "eval_steps_per_second": 0.698, "step": 160 }, { "epoch": 0.21875854525567404, "grad_norm": 0.30857041478157043, "learning_rate": 0.00029856911617379416, "loss": 0.4033, "step": 200 }, { "epoch": 0.21875854525567404, "eval_loss": 0.8380470275878906, "eval_runtime": 212.6628, "eval_samples_per_second": 22.157, "eval_steps_per_second": 0.696, "step": 200 }, { "epoch": 0.26251025430680885, "grad_norm": 0.37047892808914185, "learning_rate": 0.00029742677355383513, "loss": 0.3781, "step": 240 }, { "epoch": 0.26251025430680885, "eval_loss": 0.8349647521972656, "eval_runtime": 212.5015, "eval_samples_per_second": 22.174, "eval_steps_per_second": 0.696, "step": 240 }, { "epoch": 0.30626196335794365, "grad_norm": 0.4294930100440979, "learning_rate": 0.0002959552167179149, "loss": 0.3722, "step": 280 }, { "epoch": 0.30626196335794365, "eval_loss": 0.8508928418159485, "eval_runtime": 212.9223, "eval_samples_per_second": 22.13, "eval_steps_per_second": 0.695, "step": 280 }, { "epoch": 0.3500136724090785, "grad_norm": 0.39482536911964417, "learning_rate": 0.00029415773175456614, "loss": 0.3594, "step": 320 }, { "epoch": 0.3500136724090785, "eval_loss": 0.9025893211364746, "eval_runtime": 212.0864, "eval_samples_per_second": 22.217, "eval_steps_per_second": 0.698, "step": 320 }, { "epoch": 0.3937653814602133, "grad_norm": 0.8047598600387573, "learning_rate": 0.0002920383325724476, "loss": 0.357, "step": 360 }, { "epoch": 0.3937653814602133, "eval_loss": 0.9121100902557373, "eval_runtime": 212.3738, "eval_samples_per_second": 22.187, "eval_steps_per_second": 0.697, "step": 360 }, { "epoch": 0.4375170905113481, "grad_norm": 0.4813142418861389, "learning_rate": 0.0002896017519370078, "loss": 0.3297, "step": 400 }, { "epoch": 0.4375170905113481, "eval_loss": 0.8974694013595581, "eval_runtime": 212.4785, "eval_samples_per_second": 22.176, "eval_steps_per_second": 0.697, "step": 400 }, { "epoch": 0.4812687995624829, "grad_norm": 0.62039715051651, "learning_rate": 0.000286853430901891, "loss": 0.3243, "step": 440 }, { "epoch": 0.4812687995624829, "eval_loss": 0.9863638877868652, "eval_runtime": 212.4156, "eval_samples_per_second": 22.183, "eval_steps_per_second": 0.697, "step": 440 }, { "epoch": 0.4812687995624829, "step": 440, "total_flos": 7.823309976360714e+17, "train_loss": 0.4586519674821333, "train_runtime": 21284.6342, "train_samples_per_second": 32.987, "train_steps_per_second": 0.129 } ], "logging_steps": 40, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 40, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.823309976360714e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }