{ "best_metric": 0.5337400436401367, "best_model_checkpoint": "saves/Mistral-7B/lora/train_1/checkpoint-10", "epoch": 0.06562756357670221, "eval_steps": 10, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010937927262783703, "grad_norm": 0.9827508330345154, "learning_rate": 0.00029999015487222375, "loss": 0.8548, "step": 10 }, { "epoch": 0.010937927262783703, "eval_loss": 0.5337400436401367, "eval_runtime": 231.0066, "eval_samples_per_second": 20.398, "eval_steps_per_second": 0.641, "step": 10 }, { "epoch": 0.021875854525567406, "grad_norm": 0.6526839733123779, "learning_rate": 0.00029996062078124905, "loss": 0.4005, "step": 20 }, { "epoch": 0.021875854525567406, "eval_loss": 0.5784199833869934, "eval_runtime": 231.0005, "eval_samples_per_second": 20.398, "eval_steps_per_second": 0.641, "step": 20 }, { "epoch": 0.03281378178835111, "grad_norm": 0.5257601141929626, "learning_rate": 0.0002999114016039678, "loss": 0.3521, "step": 30 }, { "epoch": 0.03281378178835111, "eval_loss": 0.5616388320922852, "eval_runtime": 230.9639, "eval_samples_per_second": 20.401, "eval_steps_per_second": 0.641, "step": 30 }, { "epoch": 0.04375170905113481, "grad_norm": 0.553675651550293, "learning_rate": 0.00029984250380130117, "loss": 0.3565, "step": 40 }, { "epoch": 0.04375170905113481, "eval_loss": 0.5677462220191956, "eval_runtime": 231.0825, "eval_samples_per_second": 20.391, "eval_steps_per_second": 0.64, "step": 40 }, { "epoch": 0.05468963631391851, "grad_norm": 0.3802327811717987, "learning_rate": 0.0002997539364173515, "loss": 0.3418, "step": 50 }, { "epoch": 0.05468963631391851, "eval_loss": 0.5386738181114197, "eval_runtime": 231.0958, "eval_samples_per_second": 20.39, "eval_steps_per_second": 0.64, "step": 50 }, { "epoch": 0.06562756357670221, "grad_norm": 0.5235214829444885, "learning_rate": 0.00029964571107821494, "loss": 0.3298, "step": 60 }, { "epoch": 0.06562756357670221, "eval_loss": 0.5525479912757874, "eval_runtime": 230.9516, "eval_samples_per_second": 20.403, "eval_steps_per_second": 0.641, "step": 60 }, { "epoch": 0.06562756357670221, "step": 60, "total_flos": 1.143902298880082e+17, "train_loss": 0.43926427761713666, "train_runtime": 4213.5123, "train_samples_per_second": 166.635, "train_steps_per_second": 0.651 } ], "logging_steps": 10, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.143902298880082e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }