{ "best_metric": 0.42495009303092957, "best_model_checkpoint": "../experiments_checkpoints/LoRA/Qwen/Qwen1.5_7B_LoRA_MAdAiLab/amazon_attrprompt/checkpoint-350", "epoch": 3.0, "eval_steps": 50, "global_step": 1140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 88.5622329711914, "learning_rate": 4.956140350877193e-05, "loss": 9.8375, "step": 10 }, { "epoch": 0.05, "grad_norm": 76.16983032226562, "learning_rate": 4.912280701754386e-05, "loss": 6.3422, "step": 20 }, { "epoch": 0.08, "grad_norm": 68.94097137451172, "learning_rate": 4.868421052631579e-05, "loss": 3.866, "step": 30 }, { "epoch": 0.11, "grad_norm": 72.53247833251953, "learning_rate": 4.824561403508772e-05, "loss": 2.1033, "step": 40 }, { "epoch": 0.13, "grad_norm": 67.55592346191406, "learning_rate": 4.780701754385965e-05, "loss": 1.6833, "step": 50 }, { "epoch": 0.13, "eval_accuracy": 0.6640316205533597, "eval_f1_macro": 0.5878963576218975, "eval_f1_micro": 0.6640316205533597, "eval_loss": 1.2279417514801025, "eval_runtime": 19.0551, "eval_samples_per_second": 79.664, "eval_steps_per_second": 2.519, "step": 50 }, { "epoch": 0.16, "grad_norm": 34.1900520324707, "learning_rate": 4.736842105263158e-05, "loss": 1.0584, "step": 60 }, { "epoch": 0.18, "grad_norm": 48.36394119262695, "learning_rate": 4.6929824561403515e-05, "loss": 1.0168, "step": 70 }, { "epoch": 0.21, "grad_norm": 46.12382888793945, "learning_rate": 4.649122807017544e-05, "loss": 0.8845, "step": 80 }, { "epoch": 0.24, "grad_norm": 43.66932678222656, "learning_rate": 4.605263157894737e-05, "loss": 0.8027, "step": 90 }, { "epoch": 0.26, "grad_norm": 28.10245132446289, "learning_rate": 4.56140350877193e-05, "loss": 0.6531, "step": 100 }, { "epoch": 0.26, "eval_accuracy": 0.8155467720685112, "eval_f1_macro": 0.7766571443103144, "eval_f1_micro": 0.8155467720685112, "eval_loss": 0.6577733755111694, "eval_runtime": 19.1186, "eval_samples_per_second": 79.399, "eval_steps_per_second": 2.511, "step": 100 }, { "epoch": 0.29, "grad_norm": 59.69050216674805, "learning_rate": 4.517543859649123e-05, "loss": 0.8677, "step": 110 }, { "epoch": 0.32, "grad_norm": 56.882625579833984, "learning_rate": 4.473684210526316e-05, "loss": 0.7174, "step": 120 }, { "epoch": 0.34, "grad_norm": 29.96848487854004, "learning_rate": 4.429824561403509e-05, "loss": 0.5984, "step": 130 }, { "epoch": 0.37, "grad_norm": 53.101444244384766, "learning_rate": 4.3859649122807014e-05, "loss": 0.5318, "step": 140 }, { "epoch": 0.39, "grad_norm": 30.550228118896484, "learning_rate": 4.342105263157895e-05, "loss": 0.6075, "step": 150 }, { "epoch": 0.39, "eval_accuracy": 0.8326745718050066, "eval_f1_macro": 0.8113232244484544, "eval_f1_micro": 0.8326745718050066, "eval_loss": 0.5934926867485046, "eval_runtime": 19.1445, "eval_samples_per_second": 79.292, "eval_steps_per_second": 2.507, "step": 150 }, { "epoch": 0.42, "grad_norm": 47.42914581298828, "learning_rate": 4.298245614035088e-05, "loss": 0.7186, "step": 160 }, { "epoch": 0.45, "grad_norm": 34.166526794433594, "learning_rate": 4.254385964912281e-05, "loss": 0.7026, "step": 170 }, { "epoch": 0.47, "grad_norm": 28.214841842651367, "learning_rate": 4.210526315789474e-05, "loss": 0.6292, "step": 180 }, { "epoch": 0.5, "grad_norm": 45.511444091796875, "learning_rate": 4.166666666666667e-05, "loss": 0.7115, "step": 190 }, { "epoch": 0.53, "grad_norm": 18.114608764648438, "learning_rate": 4.12280701754386e-05, "loss": 0.5646, "step": 200 }, { "epoch": 0.53, "eval_accuracy": 0.8379446640316206, "eval_f1_macro": 0.8193854450487876, "eval_f1_micro": 0.8379446640316206, "eval_loss": 0.5659688115119934, "eval_runtime": 19.1468, "eval_samples_per_second": 79.282, "eval_steps_per_second": 2.507, "step": 200 }, { "epoch": 0.55, "grad_norm": 29.09646987915039, "learning_rate": 4.078947368421053e-05, "loss": 0.4866, "step": 210 }, { "epoch": 0.58, "grad_norm": 44.29309844970703, "learning_rate": 4.0350877192982455e-05, "loss": 0.4314, "step": 220 }, { "epoch": 0.61, "grad_norm": 42.93623733520508, "learning_rate": 3.991228070175439e-05, "loss": 0.7159, "step": 230 }, { "epoch": 0.63, "grad_norm": 33.71025848388672, "learning_rate": 3.9473684210526316e-05, "loss": 0.6136, "step": 240 }, { "epoch": 0.66, "grad_norm": 42.2908821105957, "learning_rate": 3.9035087719298244e-05, "loss": 0.6148, "step": 250 }, { "epoch": 0.66, "eval_accuracy": 0.8425559947299077, "eval_f1_macro": 0.8319363334583545, "eval_f1_micro": 0.8425559947299077, "eval_loss": 0.531785249710083, "eval_runtime": 19.145, "eval_samples_per_second": 79.29, "eval_steps_per_second": 2.507, "step": 250 }, { "epoch": 0.68, "grad_norm": 38.71971130371094, "learning_rate": 3.859649122807018e-05, "loss": 0.582, "step": 260 }, { "epoch": 0.71, "grad_norm": 21.194923400878906, "learning_rate": 3.815789473684211e-05, "loss": 0.5039, "step": 270 }, { "epoch": 0.74, "grad_norm": 47.179935455322266, "learning_rate": 3.771929824561404e-05, "loss": 0.5564, "step": 280 }, { "epoch": 0.76, "grad_norm": 34.193275451660156, "learning_rate": 3.728070175438597e-05, "loss": 0.452, "step": 290 }, { "epoch": 0.79, "grad_norm": 20.546531677246094, "learning_rate": 3.6842105263157895e-05, "loss": 0.4047, "step": 300 }, { "epoch": 0.79, "eval_accuracy": 0.8649538866930171, "eval_f1_macro": 0.8467453346863307, "eval_f1_micro": 0.8649538866930171, "eval_loss": 0.4545969069004059, "eval_runtime": 19.151, "eval_samples_per_second": 79.265, "eval_steps_per_second": 2.506, "step": 300 }, { "epoch": 0.82, "grad_norm": 46.09516525268555, "learning_rate": 3.640350877192983e-05, "loss": 0.4348, "step": 310 }, { "epoch": 0.84, "grad_norm": 35.31401443481445, "learning_rate": 3.5964912280701756e-05, "loss": 0.5397, "step": 320 }, { "epoch": 0.87, "grad_norm": 22.361942291259766, "learning_rate": 3.5526315789473684e-05, "loss": 0.5323, "step": 330 }, { "epoch": 0.89, "grad_norm": 42.461910247802734, "learning_rate": 3.508771929824561e-05, "loss": 0.5028, "step": 340 }, { "epoch": 0.92, "grad_norm": 35.93864059448242, "learning_rate": 3.4649122807017546e-05, "loss": 0.568, "step": 350 }, { "epoch": 0.92, "eval_accuracy": 0.8708827404479579, "eval_f1_macro": 0.8540867659285116, "eval_f1_micro": 0.8708827404479579, "eval_loss": 0.42495009303092957, "eval_runtime": 19.1783, "eval_samples_per_second": 79.152, "eval_steps_per_second": 2.503, "step": 350 }, { "epoch": 0.95, "grad_norm": 19.8919734954834, "learning_rate": 3.421052631578947e-05, "loss": 0.3863, "step": 360 }, { "epoch": 0.97, "grad_norm": 46.49361801147461, "learning_rate": 3.377192982456141e-05, "loss": 0.5599, "step": 370 }, { "epoch": 1.0, "grad_norm": 31.97792625427246, "learning_rate": 3.3333333333333335e-05, "loss": 0.5042, "step": 380 }, { "epoch": 1.03, "grad_norm": 30.975046157836914, "learning_rate": 3.289473684210527e-05, "loss": 0.3024, "step": 390 }, { "epoch": 1.05, "grad_norm": 14.051041603088379, "learning_rate": 3.24561403508772e-05, "loss": 0.2395, "step": 400 }, { "epoch": 1.05, "eval_accuracy": 0.8761528326745718, "eval_f1_macro": 0.8611335072766768, "eval_f1_micro": 0.8761528326745718, "eval_loss": 0.4569604992866516, "eval_runtime": 19.1561, "eval_samples_per_second": 79.244, "eval_steps_per_second": 2.506, "step": 400 }, { "epoch": 1.08, "grad_norm": 28.197834014892578, "learning_rate": 3.2017543859649124e-05, "loss": 0.2336, "step": 410 }, { "epoch": 1.11, "grad_norm": 17.73228645324707, "learning_rate": 3.157894736842105e-05, "loss": 0.2069, "step": 420 }, { "epoch": 1.13, "grad_norm": 19.042102813720703, "learning_rate": 3.1140350877192986e-05, "loss": 0.3182, "step": 430 }, { "epoch": 1.16, "grad_norm": 17.37108039855957, "learning_rate": 3.0701754385964913e-05, "loss": 0.2071, "step": 440 }, { "epoch": 1.18, "grad_norm": 17.802289962768555, "learning_rate": 3.0263157894736844e-05, "loss": 0.2213, "step": 450 }, { "epoch": 1.18, "eval_accuracy": 0.8774703557312253, "eval_f1_macro": 0.8631213514149485, "eval_f1_micro": 0.8774703557312253, "eval_loss": 0.45242956280708313, "eval_runtime": 19.1508, "eval_samples_per_second": 79.266, "eval_steps_per_second": 2.506, "step": 450 }, { "epoch": 1.21, "grad_norm": 18.258426666259766, "learning_rate": 2.9824561403508772e-05, "loss": 0.2722, "step": 460 }, { "epoch": 1.24, "grad_norm": 24.840511322021484, "learning_rate": 2.9385964912280706e-05, "loss": 0.2721, "step": 470 }, { "epoch": 1.26, "grad_norm": 14.744382858276367, "learning_rate": 2.8947368421052634e-05, "loss": 0.2501, "step": 480 }, { "epoch": 1.29, "grad_norm": 18.21392250061035, "learning_rate": 2.850877192982456e-05, "loss": 0.2516, "step": 490 }, { "epoch": 1.32, "grad_norm": 7.454222679138184, "learning_rate": 2.8070175438596492e-05, "loss": 0.1778, "step": 500 }, { "epoch": 1.32, "eval_accuracy": 0.8748353096179183, "eval_f1_macro": 0.8507965490074818, "eval_f1_micro": 0.8748353096179183, "eval_loss": 0.4648575782775879, "eval_runtime": 19.1651, "eval_samples_per_second": 79.207, "eval_steps_per_second": 2.505, "step": 500 }, { "epoch": 1.34, "grad_norm": 15.366374969482422, "learning_rate": 2.7631578947368426e-05, "loss": 0.1821, "step": 510 }, { "epoch": 1.37, "grad_norm": 17.506078720092773, "learning_rate": 2.7192982456140354e-05, "loss": 0.2568, "step": 520 }, { "epoch": 1.39, "grad_norm": 32.42897033691406, "learning_rate": 2.675438596491228e-05, "loss": 0.2519, "step": 530 }, { "epoch": 1.42, "grad_norm": 16.54423713684082, "learning_rate": 2.6315789473684212e-05, "loss": 0.1991, "step": 540 }, { "epoch": 1.45, "grad_norm": 13.094934463500977, "learning_rate": 2.5877192982456143e-05, "loss": 0.1738, "step": 550 }, { "epoch": 1.45, "eval_accuracy": 0.8794466403162056, "eval_f1_macro": 0.8616811925870786, "eval_f1_micro": 0.8794466403162056, "eval_loss": 0.4853415787220001, "eval_runtime": 19.1386, "eval_samples_per_second": 79.316, "eval_steps_per_second": 2.508, "step": 550 }, { "epoch": 1.47, "grad_norm": 9.557943344116211, "learning_rate": 2.5438596491228074e-05, "loss": 0.3554, "step": 560 }, { "epoch": 1.5, "grad_norm": 20.867530822753906, "learning_rate": 2.5e-05, "loss": 0.2558, "step": 570 }, { "epoch": 1.53, "grad_norm": 16.902305603027344, "learning_rate": 2.456140350877193e-05, "loss": 0.2638, "step": 580 }, { "epoch": 1.55, "grad_norm": 14.376564979553223, "learning_rate": 2.412280701754386e-05, "loss": 0.1744, "step": 590 }, { "epoch": 1.58, "grad_norm": 21.982099533081055, "learning_rate": 2.368421052631579e-05, "loss": 0.2643, "step": 600 }, { "epoch": 1.58, "eval_accuracy": 0.8827404479578392, "eval_f1_macro": 0.8675629871629298, "eval_f1_micro": 0.8827404479578392, "eval_loss": 0.43024396896362305, "eval_runtime": 19.1559, "eval_samples_per_second": 79.244, "eval_steps_per_second": 2.506, "step": 600 }, { "epoch": 1.61, "grad_norm": 8.151387214660645, "learning_rate": 2.324561403508772e-05, "loss": 0.2679, "step": 610 }, { "epoch": 1.63, "grad_norm": 22.84608268737793, "learning_rate": 2.280701754385965e-05, "loss": 0.2687, "step": 620 }, { "epoch": 1.66, "grad_norm": 18.665374755859375, "learning_rate": 2.236842105263158e-05, "loss": 0.1821, "step": 630 }, { "epoch": 1.68, "grad_norm": 16.556060791015625, "learning_rate": 2.1929824561403507e-05, "loss": 0.2583, "step": 640 }, { "epoch": 1.71, "grad_norm": 10.120057106018066, "learning_rate": 2.149122807017544e-05, "loss": 0.3357, "step": 650 }, { "epoch": 1.71, "eval_accuracy": 0.8827404479578392, "eval_f1_macro": 0.8673002865339278, "eval_f1_micro": 0.8827404479578392, "eval_loss": 0.43883296847343445, "eval_runtime": 19.1504, "eval_samples_per_second": 79.267, "eval_steps_per_second": 2.506, "step": 650 }, { "epoch": 1.74, "grad_norm": 27.886629104614258, "learning_rate": 2.105263157894737e-05, "loss": 0.2225, "step": 660 }, { "epoch": 1.76, "grad_norm": 17.229507446289062, "learning_rate": 2.06140350877193e-05, "loss": 0.2053, "step": 670 }, { "epoch": 1.79, "grad_norm": 33.89767837524414, "learning_rate": 2.0175438596491227e-05, "loss": 0.223, "step": 680 }, { "epoch": 1.82, "grad_norm": 12.045727729797363, "learning_rate": 1.9736842105263158e-05, "loss": 0.1707, "step": 690 }, { "epoch": 1.84, "grad_norm": 19.970233917236328, "learning_rate": 1.929824561403509e-05, "loss": 0.3029, "step": 700 }, { "epoch": 1.84, "eval_accuracy": 0.8827404479578392, "eval_f1_macro": 0.8655521627258196, "eval_f1_micro": 0.8827404479578392, "eval_loss": 0.4430885314941406, "eval_runtime": 19.1338, "eval_samples_per_second": 79.336, "eval_steps_per_second": 2.509, "step": 700 }, { "epoch": 1.87, "grad_norm": 16.294740676879883, "learning_rate": 1.885964912280702e-05, "loss": 0.222, "step": 710 }, { "epoch": 1.89, "grad_norm": 12.050216674804688, "learning_rate": 1.8421052631578947e-05, "loss": 0.2102, "step": 720 }, { "epoch": 1.92, "grad_norm": 21.319595336914062, "learning_rate": 1.7982456140350878e-05, "loss": 0.1974, "step": 730 }, { "epoch": 1.95, "grad_norm": 9.788517951965332, "learning_rate": 1.7543859649122806e-05, "loss": 0.2275, "step": 740 }, { "epoch": 1.97, "grad_norm": 19.825088500976562, "learning_rate": 1.7105263157894737e-05, "loss": 0.1809, "step": 750 }, { "epoch": 1.97, "eval_accuracy": 0.8899868247694335, "eval_f1_macro": 0.874229892393047, "eval_f1_micro": 0.8899868247694335, "eval_loss": 0.42660534381866455, "eval_runtime": 19.1466, "eval_samples_per_second": 79.283, "eval_steps_per_second": 2.507, "step": 750 }, { "epoch": 2.0, "grad_norm": 14.426370620727539, "learning_rate": 1.6666666666666667e-05, "loss": 0.1963, "step": 760 }, { "epoch": 2.03, "grad_norm": 17.27349281311035, "learning_rate": 1.62280701754386e-05, "loss": 0.1066, "step": 770 }, { "epoch": 2.05, "grad_norm": 3.37688946723938, "learning_rate": 1.5789473684210526e-05, "loss": 0.064, "step": 780 }, { "epoch": 2.08, "grad_norm": 6.233172416687012, "learning_rate": 1.5350877192982457e-05, "loss": 0.0603, "step": 790 }, { "epoch": 2.11, "grad_norm": 17.858177185058594, "learning_rate": 1.4912280701754386e-05, "loss": 0.0589, "step": 800 }, { "epoch": 2.11, "eval_accuracy": 0.8945981554677207, "eval_f1_macro": 0.8814951057007789, "eval_f1_micro": 0.8945981554677207, "eval_loss": 0.4498850703239441, "eval_runtime": 19.1446, "eval_samples_per_second": 79.291, "eval_steps_per_second": 2.507, "step": 800 }, { "epoch": 2.13, "grad_norm": 10.511329650878906, "learning_rate": 1.4473684210526317e-05, "loss": 0.0526, "step": 810 }, { "epoch": 2.16, "grad_norm": 3.655291795730591, "learning_rate": 1.4035087719298246e-05, "loss": 0.0308, "step": 820 }, { "epoch": 2.18, "grad_norm": 14.438605308532715, "learning_rate": 1.3596491228070177e-05, "loss": 0.063, "step": 830 }, { "epoch": 2.21, "grad_norm": 5.1180195808410645, "learning_rate": 1.3157894736842106e-05, "loss": 0.0297, "step": 840 }, { "epoch": 2.24, "grad_norm": 9.131390571594238, "learning_rate": 1.2719298245614037e-05, "loss": 0.0531, "step": 850 }, { "epoch": 2.24, "eval_accuracy": 0.8919631093544137, "eval_f1_macro": 0.8757800595234113, "eval_f1_micro": 0.8919631093544137, "eval_loss": 0.475754976272583, "eval_runtime": 19.1667, "eval_samples_per_second": 79.2, "eval_steps_per_second": 2.504, "step": 850 }, { "epoch": 2.26, "grad_norm": 0.74644935131073, "learning_rate": 1.2280701754385964e-05, "loss": 0.0649, "step": 860 }, { "epoch": 2.29, "grad_norm": 9.349382400512695, "learning_rate": 1.1842105263157895e-05, "loss": 0.0213, "step": 870 }, { "epoch": 2.32, "grad_norm": 11.547332763671875, "learning_rate": 1.1403508771929824e-05, "loss": 0.0526, "step": 880 }, { "epoch": 2.34, "grad_norm": 4.113059043884277, "learning_rate": 1.0964912280701754e-05, "loss": 0.0187, "step": 890 }, { "epoch": 2.37, "grad_norm": 3.626723527908325, "learning_rate": 1.0526315789473684e-05, "loss": 0.0234, "step": 900 }, { "epoch": 2.37, "eval_accuracy": 0.8952569169960475, "eval_f1_macro": 0.8803631688376218, "eval_f1_micro": 0.8952569169960475, "eval_loss": 0.47881799936294556, "eval_runtime": 19.145, "eval_samples_per_second": 79.29, "eval_steps_per_second": 2.507, "step": 900 }, { "epoch": 2.39, "grad_norm": 11.01301383972168, "learning_rate": 1.0087719298245614e-05, "loss": 0.0655, "step": 910 }, { "epoch": 2.42, "grad_norm": 11.407805442810059, "learning_rate": 9.649122807017545e-06, "loss": 0.027, "step": 920 }, { "epoch": 2.45, "grad_norm": 1.768430471420288, "learning_rate": 9.210526315789474e-06, "loss": 0.0518, "step": 930 }, { "epoch": 2.47, "grad_norm": 20.94036102294922, "learning_rate": 8.771929824561403e-06, "loss": 0.0359, "step": 940 }, { "epoch": 2.5, "grad_norm": 1.7965754270553589, "learning_rate": 8.333333333333334e-06, "loss": 0.0145, "step": 950 }, { "epoch": 2.5, "eval_accuracy": 0.8939393939393939, "eval_f1_macro": 0.877949638322673, "eval_f1_micro": 0.8939393939393939, "eval_loss": 0.49758803844451904, "eval_runtime": 19.1473, "eval_samples_per_second": 79.28, "eval_steps_per_second": 2.507, "step": 950 }, { "epoch": 2.53, "grad_norm": 0.1458648443222046, "learning_rate": 7.894736842105263e-06, "loss": 0.0425, "step": 960 }, { "epoch": 2.55, "grad_norm": 11.682854652404785, "learning_rate": 7.456140350877193e-06, "loss": 0.0851, "step": 970 }, { "epoch": 2.58, "grad_norm": 1.2510184049606323, "learning_rate": 7.017543859649123e-06, "loss": 0.0299, "step": 980 }, { "epoch": 2.61, "grad_norm": 10.715560913085938, "learning_rate": 6.578947368421053e-06, "loss": 0.0678, "step": 990 }, { "epoch": 2.63, "grad_norm": 11.586739540100098, "learning_rate": 6.140350877192982e-06, "loss": 0.058, "step": 1000 }, { "epoch": 2.63, "eval_accuracy": 0.8992094861660079, "eval_f1_macro": 0.8816426015432036, "eval_f1_micro": 0.8992094861660079, "eval_loss": 0.4966810941696167, "eval_runtime": 19.17, "eval_samples_per_second": 79.186, "eval_steps_per_second": 2.504, "step": 1000 }, { "epoch": 2.66, "grad_norm": 1.5098384618759155, "learning_rate": 5.701754385964912e-06, "loss": 0.0287, "step": 1010 }, { "epoch": 2.68, "grad_norm": 6.79250955581665, "learning_rate": 5.263157894736842e-06, "loss": 0.0409, "step": 1020 }, { "epoch": 2.71, "grad_norm": 2.9859375953674316, "learning_rate": 4.824561403508772e-06, "loss": 0.0379, "step": 1030 }, { "epoch": 2.74, "grad_norm": 14.098068237304688, "learning_rate": 4.3859649122807014e-06, "loss": 0.046, "step": 1040 }, { "epoch": 2.76, "grad_norm": 10.598516464233398, "learning_rate": 3.9473684210526315e-06, "loss": 0.05, "step": 1050 }, { "epoch": 2.76, "eval_accuracy": 0.8932806324110671, "eval_f1_macro": 0.8752583058969902, "eval_f1_micro": 0.8932806324110671, "eval_loss": 0.5112892389297485, "eval_runtime": 19.1422, "eval_samples_per_second": 79.301, "eval_steps_per_second": 2.508, "step": 1050 }, { "epoch": 2.79, "grad_norm": 0.23401279747486115, "learning_rate": 3.5087719298245615e-06, "loss": 0.0655, "step": 1060 }, { "epoch": 2.82, "grad_norm": 4.760624885559082, "learning_rate": 3.070175438596491e-06, "loss": 0.0659, "step": 1070 }, { "epoch": 2.84, "grad_norm": 1.6583623886108398, "learning_rate": 2.631578947368421e-06, "loss": 0.0429, "step": 1080 }, { "epoch": 2.87, "grad_norm": 0.6040318608283997, "learning_rate": 2.1929824561403507e-06, "loss": 0.0168, "step": 1090 }, { "epoch": 2.89, "grad_norm": 11.433518409729004, "learning_rate": 1.7543859649122807e-06, "loss": 0.0556, "step": 1100 }, { "epoch": 2.89, "eval_accuracy": 0.8965744400527009, "eval_f1_macro": 0.8803498700160407, "eval_f1_micro": 0.8965744400527009, "eval_loss": 0.502357542514801, "eval_runtime": 19.1404, "eval_samples_per_second": 79.309, "eval_steps_per_second": 2.508, "step": 1100 }, { "epoch": 2.92, "grad_norm": 17.50218963623047, "learning_rate": 1.3157894736842106e-06, "loss": 0.0795, "step": 1110 }, { "epoch": 2.95, "grad_norm": 2.289309501647949, "learning_rate": 8.771929824561404e-07, "loss": 0.0646, "step": 1120 }, { "epoch": 2.97, "grad_norm": 16.876842498779297, "learning_rate": 4.385964912280702e-07, "loss": 0.0431, "step": 1130 }, { "epoch": 3.0, "grad_norm": 0.3004974126815796, "learning_rate": 0.0, "loss": 0.0419, "step": 1140 }, { "epoch": 3.0, "step": 1140, "total_flos": 1.9041981890455142e+17, "train_loss": 0.48372833394167714, "train_runtime": 1860.5106, "train_samples_per_second": 19.582, "train_steps_per_second": 0.613 } ], "logging_steps": 10, "max_steps": 1140, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 1.9041981890455142e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }