{ "best_metric": 0.07866430282592773, "best_model_checkpoint": "saves/Mistral-7B/lora/train_1/checkpoint-50", "epoch": 2.8828828828828827, "eval_steps": 10, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36036036036036034, "grad_norm": 0.26596060395240784, "learning_rate": 0.00028885859539033357, "loss": 0.4784, "step": 10 }, { "epoch": 0.36036036036036034, "eval_loss": 0.10740000009536743, "eval_runtime": 92.81, "eval_samples_per_second": 15.774, "eval_steps_per_second": 1.972, "step": 10 }, { "epoch": 0.7207207207207207, "grad_norm": 0.3412328362464905, "learning_rate": 0.00025708946018368484, "loss": 0.0943, "step": 20 }, { "epoch": 0.7207207207207207, "eval_loss": 0.08951539546251297, "eval_runtime": 92.8999, "eval_samples_per_second": 15.759, "eval_steps_per_second": 1.97, "step": 20 }, { "epoch": 1.0810810810810811, "grad_norm": 0.26343515515327454, "learning_rate": 0.0002094119649058735, "loss": 0.0763, "step": 30 }, { "epoch": 1.0810810810810811, "eval_loss": 0.08366864919662476, "eval_runtime": 92.931, "eval_samples_per_second": 15.754, "eval_steps_per_second": 1.969, "step": 30 }, { "epoch": 1.4414414414414414, "grad_norm": 0.20433703064918518, "learning_rate": 0.00015290869976577364, "loss": 0.0674, "step": 40 }, { "epoch": 1.4414414414414414, "eval_loss": 0.08040930330753326, "eval_runtime": 92.8073, "eval_samples_per_second": 15.775, "eval_steps_per_second": 1.972, "step": 40 }, { "epoch": 1.8018018018018018, "grad_norm": 0.23807112872600555, "learning_rate": 9.597334127929346e-05, "loss": 0.0622, "step": 50 }, { "epoch": 1.8018018018018018, "eval_loss": 0.07866430282592773, "eval_runtime": 92.8443, "eval_samples_per_second": 15.768, "eval_steps_per_second": 1.971, "step": 50 }, { "epoch": 2.1621621621621623, "grad_norm": 0.21819865703582764, "learning_rate": 4.706375431968997e-05, "loss": 0.0596, "step": 60 }, { "epoch": 2.1621621621621623, "eval_loss": 0.08040966093540192, "eval_runtime": 92.8758, "eval_samples_per_second": 15.763, "eval_steps_per_second": 1.97, "step": 60 }, { "epoch": 2.5225225225225225, "grad_norm": 0.23914609849452972, "learning_rate": 1.3445558855078014e-05, "loss": 0.0493, "step": 70 }, { "epoch": 2.5225225225225225, "eval_loss": 0.08447360247373581, "eval_runtime": 92.8784, "eval_samples_per_second": 15.763, "eval_steps_per_second": 1.97, "step": 70 }, { "epoch": 2.8828828828828827, "grad_norm": 0.24320539832115173, "learning_rate": 1.1280712436549378e-07, "loss": 0.0523, "step": 80 }, { "epoch": 2.8828828828828827, "eval_loss": 0.08488883823156357, "eval_runtime": 92.8598, "eval_samples_per_second": 15.766, "eval_steps_per_second": 1.971, "step": 80 }, { "epoch": 2.8828828828828827, "step": 80, "total_flos": 1.2688077065394586e+17, "train_loss": 0.117483252286911, "train_runtime": 3922.4935, "train_samples_per_second": 5.432, "train_steps_per_second": 0.021 } ], "logging_steps": 10, "max_steps": 81, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2688077065394586e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }