{ "best_metric": 0.5073133111000061, "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/Qwen/Qwen1.5_1.8B_amazon/checkpoint-350", "epoch": 1.0, "eval_steps": 50, "global_step": 380, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 129.14089965820312, "learning_rate": 4.8684210526315795e-06, "loss": 7.3844, "step": 10 }, { "epoch": 0.05, "grad_norm": 123.16605377197266, "learning_rate": 4.736842105263158e-06, "loss": 4.8281, "step": 20 }, { "epoch": 0.08, "grad_norm": 73.3310546875, "learning_rate": 4.605263157894737e-06, "loss": 3.2703, "step": 30 }, { "epoch": 0.11, "grad_norm": 79.52980041503906, "learning_rate": 4.473684210526316e-06, "loss": 2.0736, "step": 40 }, { "epoch": 0.13, "grad_norm": 65.27238464355469, "learning_rate": 4.342105263157895e-06, "loss": 1.5641, "step": 50 }, { "epoch": 0.13, "eval_accuracy": 0.647562582345191, "eval_f1_macro": 0.5588571972473421, "eval_f1_micro": 0.647562582345191, "eval_loss": 1.2904983758926392, "eval_runtime": 4.0513, "eval_samples_per_second": 374.697, "eval_steps_per_second": 11.848, "step": 50 }, { "epoch": 0.16, "grad_norm": 50.00752258300781, "learning_rate": 4.210526315789474e-06, "loss": 1.1144, "step": 60 }, { "epoch": 0.18, "grad_norm": 39.85783386230469, "learning_rate": 4.078947368421053e-06, "loss": 1.1287, "step": 70 }, { "epoch": 0.21, "grad_norm": 62.46803665161133, "learning_rate": 3.947368421052632e-06, "loss": 0.8914, "step": 80 }, { "epoch": 0.24, "grad_norm": 42.484432220458984, "learning_rate": 3.815789473684211e-06, "loss": 0.8281, "step": 90 }, { "epoch": 0.26, "grad_norm": 48.54948043823242, "learning_rate": 3.6842105263157896e-06, "loss": 0.744, "step": 100 }, { "epoch": 0.26, "eval_accuracy": 0.769433465085639, "eval_f1_macro": 0.716629286760697, "eval_f1_micro": 0.769433465085639, "eval_loss": 0.7993608117103577, "eval_runtime": 4.0452, "eval_samples_per_second": 375.256, "eval_steps_per_second": 11.866, "step": 100 }, { "epoch": 0.29, "grad_norm": 47.28898239135742, "learning_rate": 3.5526315789473687e-06, "loss": 0.9268, "step": 110 }, { "epoch": 0.32, "grad_norm": 49.59343338012695, "learning_rate": 3.421052631578948e-06, "loss": 0.7649, "step": 120 }, { "epoch": 0.34, "grad_norm": 36.2591438293457, "learning_rate": 3.289473684210527e-06, "loss": 0.716, "step": 130 }, { "epoch": 0.37, "grad_norm": 47.06060791015625, "learning_rate": 3.157894736842105e-06, "loss": 0.5508, "step": 140 }, { "epoch": 0.39, "grad_norm": 45.960975646972656, "learning_rate": 3.0263157894736843e-06, "loss": 0.7245, "step": 150 }, { "epoch": 0.39, "eval_accuracy": 0.7997364953886693, "eval_f1_macro": 0.744994161609646, "eval_f1_micro": 0.7997364953886693, "eval_loss": 0.6845870614051819, "eval_runtime": 4.0142, "eval_samples_per_second": 378.16, "eval_steps_per_second": 11.958, "step": 150 }, { "epoch": 0.42, "grad_norm": 49.28917694091797, "learning_rate": 2.8947368421052634e-06, "loss": 0.6837, "step": 160 }, { "epoch": 0.45, "grad_norm": 34.09211349487305, "learning_rate": 2.7631578947368424e-06, "loss": 0.6589, "step": 170 }, { "epoch": 0.47, "grad_norm": 38.694705963134766, "learning_rate": 2.631578947368421e-06, "loss": 0.6702, "step": 180 }, { "epoch": 0.5, "grad_norm": 64.92501068115234, "learning_rate": 2.5e-06, "loss": 0.7353, "step": 190 }, { "epoch": 0.53, "grad_norm": 39.18191909790039, "learning_rate": 2.368421052631579e-06, "loss": 0.6491, "step": 200 }, { "epoch": 0.53, "eval_accuracy": 0.8155467720685112, "eval_f1_macro": 0.7679142075529768, "eval_f1_micro": 0.8155467720685112, "eval_loss": 0.640599250793457, "eval_runtime": 4.0408, "eval_samples_per_second": 375.669, "eval_steps_per_second": 11.879, "step": 200 }, { "epoch": 0.55, "grad_norm": 39.82322311401367, "learning_rate": 2.236842105263158e-06, "loss": 0.545, "step": 210 }, { "epoch": 0.58, "grad_norm": 60.72475051879883, "learning_rate": 2.105263157894737e-06, "loss": 0.5595, "step": 220 }, { "epoch": 0.61, "grad_norm": 42.45864486694336, "learning_rate": 1.973684210526316e-06, "loss": 0.6386, "step": 230 }, { "epoch": 0.63, "grad_norm": 48.077884674072266, "learning_rate": 1.8421052631578948e-06, "loss": 0.649, "step": 240 }, { "epoch": 0.66, "grad_norm": 48.596435546875, "learning_rate": 1.710526315789474e-06, "loss": 0.6193, "step": 250 }, { "epoch": 0.66, "eval_accuracy": 0.8399209486166008, "eval_f1_macro": 0.7970428846372188, "eval_f1_micro": 0.8399209486166008, "eval_loss": 0.5427243113517761, "eval_runtime": 4.3118, "eval_samples_per_second": 352.055, "eval_steps_per_second": 11.132, "step": 250 }, { "epoch": 0.68, "grad_norm": 45.50446319580078, "learning_rate": 1.5789473684210526e-06, "loss": 0.6343, "step": 260 }, { "epoch": 0.71, "grad_norm": 40.66709518432617, "learning_rate": 1.4473684210526317e-06, "loss": 0.5493, "step": 270 }, { "epoch": 0.74, "grad_norm": 50.266971588134766, "learning_rate": 1.3157894736842106e-06, "loss": 0.5177, "step": 280 }, { "epoch": 0.76, "grad_norm": 41.21518325805664, "learning_rate": 1.1842105263157894e-06, "loss": 0.4751, "step": 290 }, { "epoch": 0.79, "grad_norm": 35.079444885253906, "learning_rate": 1.0526315789473685e-06, "loss": 0.4828, "step": 300 }, { "epoch": 0.79, "eval_accuracy": 0.836627140974967, "eval_f1_macro": 0.8114023231734937, "eval_f1_micro": 0.836627140974967, "eval_loss": 0.5453078746795654, "eval_runtime": 4.0339, "eval_samples_per_second": 376.309, "eval_steps_per_second": 11.899, "step": 300 }, { "epoch": 0.82, "grad_norm": 51.024139404296875, "learning_rate": 9.210526315789474e-07, "loss": 0.4726, "step": 310 }, { "epoch": 0.84, "grad_norm": 41.68681335449219, "learning_rate": 7.894736842105263e-07, "loss": 0.5492, "step": 320 }, { "epoch": 0.87, "grad_norm": 31.459598541259766, "learning_rate": 6.578947368421053e-07, "loss": 0.6464, "step": 330 }, { "epoch": 0.89, "grad_norm": 37.8900260925293, "learning_rate": 5.263157894736843e-07, "loss": 0.4235, "step": 340 }, { "epoch": 0.92, "grad_norm": 80.35220336914062, "learning_rate": 3.9473684210526315e-07, "loss": 0.6122, "step": 350 }, { "epoch": 0.92, "eval_accuracy": 0.852437417654809, "eval_f1_macro": 0.8289709215944268, "eval_f1_micro": 0.852437417654809, "eval_loss": 0.5073133111000061, "eval_runtime": 4.0684, "eval_samples_per_second": 373.116, "eval_steps_per_second": 11.798, "step": 350 }, { "epoch": 0.95, "grad_norm": 35.06690979003906, "learning_rate": 2.6315789473684213e-07, "loss": 0.4292, "step": 360 }, { "epoch": 0.97, "grad_norm": 46.45317077636719, "learning_rate": 1.3157894736842107e-07, "loss": 0.5656, "step": 370 }, { "epoch": 1.0, "grad_norm": 42.95165252685547, "learning_rate": 0.0, "loss": 0.5355, "step": 380 }, { "epoch": 1.0, "step": 380, "total_flos": 1.13424986013696e+16, "train_loss": 1.0740083393297697, "train_runtime": 415.974, "train_samples_per_second": 29.194, "train_steps_per_second": 0.914 } ], "logging_steps": 10, "max_steps": 380, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "total_flos": 1.13424986013696e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }