{ "best_metric": 0.5355889797210693, "best_model_checkpoint": "saves/Mistral-7B/lora/train_1/checkpoint-10", "epoch": 0.06562756357670221, "eval_steps": 10, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010937927262783703, "grad_norm": 0.9827508330345154, "learning_rate": 0.00029999015487222375, "loss": 0.8548, "step": 10 }, { "epoch": 0.010937927262783703, "eval_loss": 0.5355889797210693, "eval_runtime": 2175.855, "eval_samples_per_second": 21.656, "eval_steps_per_second": 0.677, "step": 10 }, { "epoch": 0.021875854525567406, "grad_norm": 0.6526839733123779, "learning_rate": 0.00029996062078124905, "loss": 0.4005, "step": 20 }, { "epoch": 0.021875854525567406, "eval_loss": 0.579488217830658, "eval_runtime": 2175.2344, "eval_samples_per_second": 21.662, "eval_steps_per_second": 0.677, "step": 20 }, { "epoch": 0.03281378178835111, "grad_norm": 0.5257601141929626, "learning_rate": 0.0002999114016039678, "loss": 0.3521, "step": 30 }, { "epoch": 0.03281378178835111, "eval_loss": 0.5633693337440491, "eval_runtime": 2176.4963, "eval_samples_per_second": 21.649, "eval_steps_per_second": 0.677, "step": 30 }, { "epoch": 0.04375170905113481, "grad_norm": 0.553675651550293, "learning_rate": 0.00029984250380130117, "loss": 0.3565, "step": 40 }, { "epoch": 0.04375170905113481, "eval_loss": 0.5676367878913879, "eval_runtime": 2173.2369, "eval_samples_per_second": 21.682, "eval_steps_per_second": 0.678, "step": 40 }, { "epoch": 0.05468963631391851, "grad_norm": 0.3802327811717987, "learning_rate": 0.0002997539364173515, "loss": 0.3418, "step": 50 }, { "epoch": 0.05468963631391851, "eval_loss": 0.5385285019874573, "eval_runtime": 2174.1731, "eval_samples_per_second": 21.673, "eval_steps_per_second": 0.677, "step": 50 }, { "epoch": 0.06562756357670221, "grad_norm": 0.5235214829444885, "learning_rate": 0.00029964571107821494, "loss": 0.3298, "step": 60 }, { "epoch": 0.06562756357670221, "eval_loss": 0.5531513094902039, "eval_runtime": 2176.9278, "eval_samples_per_second": 21.645, "eval_steps_per_second": 0.677, "step": 60 }, { "epoch": 0.06562756357670221, "step": 60, "total_flos": 1.143902298880082e+17, "train_loss": 0.43926427761713666, "train_runtime": 15865.2354, "train_samples_per_second": 44.255, "train_steps_per_second": 0.173 } ], "logging_steps": 10, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.143902298880082e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }