{ "best_metric": 0.5069935321807861, "best_model_checkpoint": "saves/Llama2-7B/lora/train_1/checkpoint-110", "epoch": 0.22969647251845776, "eval_steps": 10, "global_step": 210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010937927262783703, "grad_norm": 0.5937469005584717, "learning_rate": 0.00029999015487222375, "loss": 1.2211, "step": 10 }, { "epoch": 0.010937927262783703, "eval_loss": 0.550807535648346, "eval_runtime": 234.7461, "eval_samples_per_second": 20.073, "eval_steps_per_second": 0.63, "step": 10 }, { "epoch": 0.021875854525567406, "grad_norm": 0.4648227393627167, "learning_rate": 0.00029996062078124905, "loss": 0.446, "step": 20 }, { "epoch": 0.021875854525567406, "eval_loss": 0.5179979205131531, "eval_runtime": 234.6313, "eval_samples_per_second": 20.083, "eval_steps_per_second": 0.631, "step": 20 }, { "epoch": 0.03281378178835111, "grad_norm": 0.42532142996788025, "learning_rate": 0.0002999114016039678, "loss": 0.3889, "step": 30 }, { "epoch": 0.03281378178835111, "eval_loss": 0.5139310956001282, "eval_runtime": 234.5844, "eval_samples_per_second": 20.087, "eval_steps_per_second": 0.631, "step": 30 }, { "epoch": 0.04375170905113481, "grad_norm": 0.47056564688682556, "learning_rate": 0.00029984250380130117, "loss": 0.3757, "step": 40 }, { "epoch": 0.04375170905113481, "eval_loss": 0.575846791267395, "eval_runtime": 234.7308, "eval_samples_per_second": 20.074, "eval_steps_per_second": 0.631, "step": 40 }, { "epoch": 0.05468963631391851, "grad_norm": 0.2359870821237564, "learning_rate": 0.0002997539364173515, "loss": 0.3551, "step": 50 }, { "epoch": 0.05468963631391851, "eval_loss": 0.5461050271987915, "eval_runtime": 234.7378, "eval_samples_per_second": 20.073, "eval_steps_per_second": 0.63, "step": 50 }, { "epoch": 0.06562756357670221, "grad_norm": 0.19186203181743622, "learning_rate": 0.00029964571107821494, "loss": 0.3351, "step": 60 }, { "epoch": 0.06562756357670221, "eval_loss": 0.5407155752182007, "eval_runtime": 234.7344, "eval_samples_per_second": 20.074, "eval_steps_per_second": 0.63, "step": 60 }, { "epoch": 0.07656549083948591, "grad_norm": 0.25036656856536865, "learning_rate": 0.00029951784199045534, "loss": 0.3269, "step": 70 }, { "epoch": 0.07656549083948591, "eval_loss": 0.5335067510604858, "eval_runtime": 234.559, "eval_samples_per_second": 20.089, "eval_steps_per_second": 0.631, "step": 70 }, { "epoch": 0.08750341810226962, "grad_norm": 0.3150342106819153, "learning_rate": 0.0002993703459392396, "loss": 0.3231, "step": 80 }, { "epoch": 0.08750341810226962, "eval_loss": 0.5120783448219299, "eval_runtime": 234.641, "eval_samples_per_second": 20.082, "eval_steps_per_second": 0.631, "step": 80 }, { "epoch": 0.09844134536505332, "grad_norm": 0.3419171869754791, "learning_rate": 0.00029920324228613376, "loss": 0.3324, "step": 90 }, { "epoch": 0.09844134536505332, "eval_loss": 0.5196456909179688, "eval_runtime": 234.6621, "eval_samples_per_second": 20.08, "eval_steps_per_second": 0.631, "step": 90 }, { "epoch": 0.10937927262783702, "grad_norm": 0.2197423279285431, "learning_rate": 0.0002990165529665622, "loss": 0.3039, "step": 100 }, { "epoch": 0.10937927262783702, "eval_loss": 0.5251961350440979, "eval_runtime": 234.8661, "eval_samples_per_second": 20.062, "eval_steps_per_second": 0.63, "step": 100 }, { "epoch": 0.12031719989062073, "grad_norm": 0.16334187984466553, "learning_rate": 0.0002988103024869277, "loss": 0.3083, "step": 110 }, { "epoch": 0.12031719989062073, "eval_loss": 0.5069935321807861, "eval_runtime": 234.5526, "eval_samples_per_second": 20.089, "eval_steps_per_second": 0.631, "step": 110 }, { "epoch": 0.13125512715340443, "grad_norm": 0.2504599392414093, "learning_rate": 0.00029858451792139453, "loss": 0.3034, "step": 120 }, { "epoch": 0.13125512715340443, "eval_loss": 0.5375232100486755, "eval_runtime": 234.9066, "eval_samples_per_second": 20.059, "eval_steps_per_second": 0.63, "step": 120 }, { "epoch": 0.14219305441618812, "grad_norm": 0.20455080270767212, "learning_rate": 0.0002983392289083346, "loss": 0.3061, "step": 130 }, { "epoch": 0.14219305441618812, "eval_loss": 0.5345537066459656, "eval_runtime": 234.955, "eval_samples_per_second": 20.055, "eval_steps_per_second": 0.63, "step": 130 }, { "epoch": 0.15313098167897182, "grad_norm": 0.20371787250041962, "learning_rate": 0.0002980744676464371, "loss": 0.3021, "step": 140 }, { "epoch": 0.15313098167897182, "eval_loss": 0.5111611485481262, "eval_runtime": 234.7152, "eval_samples_per_second": 20.075, "eval_steps_per_second": 0.631, "step": 140 }, { "epoch": 0.16406890894175555, "grad_norm": 0.14063598215579987, "learning_rate": 0.0002977902688904813, "loss": 0.3067, "step": 150 }, { "epoch": 0.16406890894175555, "eval_loss": 0.5191352367401123, "eval_runtime": 234.5955, "eval_samples_per_second": 20.086, "eval_steps_per_second": 0.631, "step": 150 }, { "epoch": 0.17500683620453925, "grad_norm": 0.20113405585289001, "learning_rate": 0.00029748666994677467, "loss": 0.2958, "step": 160 }, { "epoch": 0.17500683620453925, "eval_loss": 0.5278245210647583, "eval_runtime": 234.5747, "eval_samples_per_second": 20.087, "eval_steps_per_second": 0.631, "step": 160 }, { "epoch": 0.18594476346732294, "grad_norm": 0.20178332924842834, "learning_rate": 0.00029716371066825593, "loss": 0.3002, "step": 170 }, { "epoch": 0.18594476346732294, "eval_loss": 0.5170465111732483, "eval_runtime": 234.6689, "eval_samples_per_second": 20.079, "eval_steps_per_second": 0.631, "step": 170 }, { "epoch": 0.19688269073010664, "grad_norm": 0.19617249071598053, "learning_rate": 0.0002968214334492632, "loss": 0.2896, "step": 180 }, { "epoch": 0.19688269073010664, "eval_loss": 0.5085877180099487, "eval_runtime": 234.6318, "eval_samples_per_second": 20.083, "eval_steps_per_second": 0.631, "step": 180 }, { "epoch": 0.20782061799289034, "grad_norm": 0.2610602080821991, "learning_rate": 0.00029645988321996917, "loss": 0.2989, "step": 190 }, { "epoch": 0.20782061799289034, "eval_loss": 0.530036985874176, "eval_runtime": 234.599, "eval_samples_per_second": 20.085, "eval_steps_per_second": 0.631, "step": 190 }, { "epoch": 0.21875854525567404, "grad_norm": 0.1975240856409073, "learning_rate": 0.00029607910744048336, "loss": 0.3012, "step": 200 }, { "epoch": 0.21875854525567404, "eval_loss": 0.5137269496917725, "eval_runtime": 234.4763, "eval_samples_per_second": 20.096, "eval_steps_per_second": 0.631, "step": 200 }, { "epoch": 0.22969647251845776, "grad_norm": 0.19913341104984283, "learning_rate": 0.00029567915609462174, "loss": 0.2802, "step": 210 }, { "epoch": 0.22969647251845776, "eval_loss": 0.5159074068069458, "eval_runtime": 234.549, "eval_samples_per_second": 20.09, "eval_steps_per_second": 0.631, "step": 210 }, { "epoch": 0.22969647251845776, "step": 210, "total_flos": 3.9689172647569e+17, "train_loss": 0.36669377031780426, "train_runtime": 15000.4627, "train_samples_per_second": 46.807, "train_steps_per_second": 0.183 } ], "logging_steps": 10, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.9689172647569e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }