{ "best_metric": 0.35073795914649963, "best_model_checkpoint": "../../saves/LLaMA3-70B-qlora-bnb/lora/sft/A61K-15950_2/checkpoint-200", "epoch": 0.9975308641975309, "eval_steps": 100, "global_step": 202, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04938271604938271, "grad_norm": 46.31583786010742, "learning_rate": 9.523809523809523e-06, "loss": 12.5496, "step": 10 }, { "epoch": 0.09876543209876543, "grad_norm": 23.47727394104004, "learning_rate": 5.714285714285714e-05, "loss": 11.9318, "step": 20 }, { "epoch": 0.14814814814814814, "grad_norm": 25.78252410888672, "learning_rate": 9.999246866958692e-05, "loss": 7.8505, "step": 30 }, { "epoch": 0.19753086419753085, "grad_norm": 13.642712593078613, "learning_rate": 9.909145108993794e-05, "loss": 1.2163, "step": 40 }, { "epoch": 0.24691358024691357, "grad_norm": 10.011795997619629, "learning_rate": 9.67152097716334e-05, "loss": 0.4577, "step": 50 }, { "epoch": 0.2962962962962963, "grad_norm": 10.739720344543457, "learning_rate": 9.29351520070574e-05, "loss": 0.3768, "step": 60 }, { "epoch": 0.345679012345679, "grad_norm": 11.204980850219727, "learning_rate": 8.786487050581583e-05, "loss": 0.4593, "step": 70 }, { "epoch": 0.3950617283950617, "grad_norm": 4.646059513092041, "learning_rate": 8.165672987449962e-05, "loss": 0.4234, "step": 80 }, { "epoch": 0.4444444444444444, "grad_norm": 5.678186893463135, "learning_rate": 7.449728798069864e-05, "loss": 0.3857, "step": 90 }, { "epoch": 0.49382716049382713, "grad_norm": 49.75868225097656, "learning_rate": 6.66016897916682e-05, "loss": 0.3663, "step": 100 }, { "epoch": 0.49382716049382713, "eval_loss": 0.35489073395729065, "eval_runtime": 1317.5223, "eval_samples_per_second": 0.137, "eval_steps_per_second": 0.137, "step": 100 }, { "epoch": 0.5432098765432098, "grad_norm": 2.499194860458374, "learning_rate": 5.820720215572375e-05, "loss": 0.37, "step": 110 }, { "epoch": 0.5925925925925926, "grad_norm": 3.846980571746826, "learning_rate": 4.956608380955877e-05, "loss": 0.3788, "step": 120 }, { "epoch": 0.6419753086419753, "grad_norm": 2.995356321334839, "learning_rate": 4.093800487148857e-05, "loss": 0.401, "step": 130 }, { "epoch": 0.691358024691358, "grad_norm": 4.932299613952637, "learning_rate": 3.258224361880657e-05, "loss": 0.3944, "step": 140 }, { "epoch": 0.7407407407407407, "grad_norm": 4.609350681304932, "learning_rate": 2.474989504016798e-05, "loss": 0.3521, "step": 150 }, { "epoch": 0.7901234567901234, "grad_norm": 3.664435625076294, "learning_rate": 1.7676325300069825e-05, "loss": 0.3425, "step": 160 }, { "epoch": 0.8395061728395061, "grad_norm": 31.691667556762695, "learning_rate": 1.1574098862709992e-05, "loss": 0.3548, "step": 170 }, { "epoch": 0.8888888888888888, "grad_norm": 2.024195909500122, "learning_rate": 6.626590818846162e-06, "loss": 0.3404, "step": 180 }, { "epoch": 0.9382716049382716, "grad_norm": 2.383239507675171, "learning_rate": 2.9824763685681766e-06, "loss": 0.3374, "step": 190 }, { "epoch": 0.9876543209876543, "grad_norm": 2.007232189178467, "learning_rate": 7.512630539036502e-07, "loss": 0.3383, "step": 200 }, { "epoch": 0.9876543209876543, "eval_loss": 0.35073795914649963, "eval_runtime": 1316.6761, "eval_samples_per_second": 0.137, "eval_steps_per_second": 0.137, "step": 200 }, { "epoch": 0.9975308641975309, "step": 202, "total_flos": 8.350412130662744e+18, "train_loss": 1.9651573648547183, "train_runtime": 27220.4722, "train_samples_per_second": 0.06, "train_steps_per_second": 0.007 } ], "logging_steps": 10, "max_steps": 202, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 8.350412130662744e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }