{ "best_metric": 0.83160400390625, "best_model_checkpoint": "saves/BLOOM-7B/lora/train_1/checkpoint-160", "epoch": 0.3937653814602133, "eval_steps": 40, "global_step": 360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04375170905113481, "grad_norm": 0.503142237663269, "learning_rate": 0.00014457831325301204, "loss": 2.1275, "step": 40 }, { "epoch": 0.04375170905113481, "eval_loss": 0.8730847239494324, "eval_runtime": 210.0237, "eval_samples_per_second": 22.436, "eval_steps_per_second": 0.705, "step": 40 }, { "epoch": 0.08750341810226962, "grad_norm": 0.5874570608139038, "learning_rate": 0.0002891566265060241, "loss": 0.5607, "step": 80 }, { "epoch": 0.08750341810226962, "eval_loss": 0.9147108197212219, "eval_runtime": 210.6077, "eval_samples_per_second": 22.373, "eval_steps_per_second": 0.703, "step": 80 }, { "epoch": 0.13125512715340443, "grad_norm": 0.6330702900886536, "learning_rate": 0.00029985669589905196, "loss": 0.4772, "step": 120 }, { "epoch": 0.13125512715340443, "eval_loss": 0.8467113971710205, "eval_runtime": 210.5882, "eval_samples_per_second": 22.375, "eval_steps_per_second": 0.703, "step": 120 }, { "epoch": 0.17500683620453925, "grad_norm": 0.3914893865585327, "learning_rate": 0.0002993796936474492, "loss": 0.4536, "step": 160 }, { "epoch": 0.17500683620453925, "eval_loss": 0.83160400390625, "eval_runtime": 210.0873, "eval_samples_per_second": 22.429, "eval_steps_per_second": 0.704, "step": 160 }, { "epoch": 0.21875854525567404, "grad_norm": 0.3717311918735504, "learning_rate": 0.00029856911617379416, "loss": 0.4388, "step": 200 }, { "epoch": 0.21875854525567404, "eval_loss": 0.8363510370254517, "eval_runtime": 210.5622, "eval_samples_per_second": 22.378, "eval_steps_per_second": 0.703, "step": 200 }, { "epoch": 0.26251025430680885, "grad_norm": 0.3580843508243561, "learning_rate": 0.00029742677355383513, "loss": 0.4156, "step": 240 }, { "epoch": 0.26251025430680885, "eval_loss": 0.8319957852363586, "eval_runtime": 210.5767, "eval_samples_per_second": 22.377, "eval_steps_per_second": 0.703, "step": 240 }, { "epoch": 0.30626196335794365, "grad_norm": 0.43683090806007385, "learning_rate": 0.0002959552167179149, "loss": 0.4083, "step": 280 }, { "epoch": 0.30626196335794365, "eval_loss": 0.859396755695343, "eval_runtime": 210.6094, "eval_samples_per_second": 22.373, "eval_steps_per_second": 0.703, "step": 280 }, { "epoch": 0.3500136724090785, "grad_norm": 0.3830896317958832, "learning_rate": 0.00029415773175456614, "loss": 0.397, "step": 320 }, { "epoch": 0.3500136724090785, "eval_loss": 0.859840989112854, "eval_runtime": 210.7225, "eval_samples_per_second": 22.361, "eval_steps_per_second": 0.702, "step": 320 }, { "epoch": 0.3937653814602133, "grad_norm": 0.5180444717407227, "learning_rate": 0.0002920383325724476, "loss": 0.3887, "step": 360 }, { "epoch": 0.3937653814602133, "eval_loss": 0.890067458152771, "eval_runtime": 210.5277, "eval_samples_per_second": 22.382, "eval_steps_per_second": 0.703, "step": 360 }, { "epoch": 0.3937653814602133, "step": 360, "total_flos": 4.90773729509376e+17, "train_loss": 0.6297279569837783, "train_runtime": 17337.0867, "train_samples_per_second": 40.498, "train_steps_per_second": 0.158 } ], "logging_steps": 40, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 40, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.90773729509376e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }