SLM_vs_LLM_experiments
/
max_seq_length_128_experiments
/LoRA
/Qwen
/Qwen1.5_7B_LoRA_MAdAiLab
/amazon_attrprompt
/trainer_state.json
{ | |
"best_metric": 0.42495009303092957, | |
"best_model_checkpoint": "../experiments_checkpoints/LoRA/Qwen/Qwen1.5_7B_LoRA_MAdAiLab/amazon_attrprompt/checkpoint-350", | |
"epoch": 3.0, | |
"eval_steps": 50, | |
"global_step": 1140, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.03, | |
"grad_norm": 88.5622329711914, | |
"learning_rate": 4.956140350877193e-05, | |
"loss": 9.8375, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.05, | |
"grad_norm": 76.16983032226562, | |
"learning_rate": 4.912280701754386e-05, | |
"loss": 6.3422, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.08, | |
"grad_norm": 68.94097137451172, | |
"learning_rate": 4.868421052631579e-05, | |
"loss": 3.866, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.11, | |
"grad_norm": 72.53247833251953, | |
"learning_rate": 4.824561403508772e-05, | |
"loss": 2.1033, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.13, | |
"grad_norm": 67.55592346191406, | |
"learning_rate": 4.780701754385965e-05, | |
"loss": 1.6833, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.13, | |
"eval_accuracy": 0.6640316205533597, | |
"eval_f1_macro": 0.5878963576218975, | |
"eval_f1_micro": 0.6640316205533597, | |
"eval_loss": 1.2279417514801025, | |
"eval_runtime": 19.0551, | |
"eval_samples_per_second": 79.664, | |
"eval_steps_per_second": 2.519, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.16, | |
"grad_norm": 34.1900520324707, | |
"learning_rate": 4.736842105263158e-05, | |
"loss": 1.0584, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.18, | |
"grad_norm": 48.36394119262695, | |
"learning_rate": 4.6929824561403515e-05, | |
"loss": 1.0168, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.21, | |
"grad_norm": 46.12382888793945, | |
"learning_rate": 4.649122807017544e-05, | |
"loss": 0.8845, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.24, | |
"grad_norm": 43.66932678222656, | |
"learning_rate": 4.605263157894737e-05, | |
"loss": 0.8027, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.26, | |
"grad_norm": 28.10245132446289, | |
"learning_rate": 4.56140350877193e-05, | |
"loss": 0.6531, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.26, | |
"eval_accuracy": 0.8155467720685112, | |
"eval_f1_macro": 0.7766571443103144, | |
"eval_f1_micro": 0.8155467720685112, | |
"eval_loss": 0.6577733755111694, | |
"eval_runtime": 19.1186, | |
"eval_samples_per_second": 79.399, | |
"eval_steps_per_second": 2.511, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.29, | |
"grad_norm": 59.69050216674805, | |
"learning_rate": 4.517543859649123e-05, | |
"loss": 0.8677, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.32, | |
"grad_norm": 56.882625579833984, | |
"learning_rate": 4.473684210526316e-05, | |
"loss": 0.7174, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.34, | |
"grad_norm": 29.96848487854004, | |
"learning_rate": 4.429824561403509e-05, | |
"loss": 0.5984, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.37, | |
"grad_norm": 53.101444244384766, | |
"learning_rate": 4.3859649122807014e-05, | |
"loss": 0.5318, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.39, | |
"grad_norm": 30.550228118896484, | |
"learning_rate": 4.342105263157895e-05, | |
"loss": 0.6075, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.39, | |
"eval_accuracy": 0.8326745718050066, | |
"eval_f1_macro": 0.8113232244484544, | |
"eval_f1_micro": 0.8326745718050066, | |
"eval_loss": 0.5934926867485046, | |
"eval_runtime": 19.1445, | |
"eval_samples_per_second": 79.292, | |
"eval_steps_per_second": 2.507, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.42, | |
"grad_norm": 47.42914581298828, | |
"learning_rate": 4.298245614035088e-05, | |
"loss": 0.7186, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.45, | |
"grad_norm": 34.166526794433594, | |
"learning_rate": 4.254385964912281e-05, | |
"loss": 0.7026, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.47, | |
"grad_norm": 28.214841842651367, | |
"learning_rate": 4.210526315789474e-05, | |
"loss": 0.6292, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.5, | |
"grad_norm": 45.511444091796875, | |
"learning_rate": 4.166666666666667e-05, | |
"loss": 0.7115, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.53, | |
"grad_norm": 18.114608764648438, | |
"learning_rate": 4.12280701754386e-05, | |
"loss": 0.5646, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.53, | |
"eval_accuracy": 0.8379446640316206, | |
"eval_f1_macro": 0.8193854450487876, | |
"eval_f1_micro": 0.8379446640316206, | |
"eval_loss": 0.5659688115119934, | |
"eval_runtime": 19.1468, | |
"eval_samples_per_second": 79.282, | |
"eval_steps_per_second": 2.507, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.55, | |
"grad_norm": 29.09646987915039, | |
"learning_rate": 4.078947368421053e-05, | |
"loss": 0.4866, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.58, | |
"grad_norm": 44.29309844970703, | |
"learning_rate": 4.0350877192982455e-05, | |
"loss": 0.4314, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.61, | |
"grad_norm": 42.93623733520508, | |
"learning_rate": 3.991228070175439e-05, | |
"loss": 0.7159, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.63, | |
"grad_norm": 33.71025848388672, | |
"learning_rate": 3.9473684210526316e-05, | |
"loss": 0.6136, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.66, | |
"grad_norm": 42.2908821105957, | |
"learning_rate": 3.9035087719298244e-05, | |
"loss": 0.6148, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.66, | |
"eval_accuracy": 0.8425559947299077, | |
"eval_f1_macro": 0.8319363334583545, | |
"eval_f1_micro": 0.8425559947299077, | |
"eval_loss": 0.531785249710083, | |
"eval_runtime": 19.145, | |
"eval_samples_per_second": 79.29, | |
"eval_steps_per_second": 2.507, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.68, | |
"grad_norm": 38.71971130371094, | |
"learning_rate": 3.859649122807018e-05, | |
"loss": 0.582, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.71, | |
"grad_norm": 21.194923400878906, | |
"learning_rate": 3.815789473684211e-05, | |
"loss": 0.5039, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.74, | |
"grad_norm": 47.179935455322266, | |
"learning_rate": 3.771929824561404e-05, | |
"loss": 0.5564, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.76, | |
"grad_norm": 34.193275451660156, | |
"learning_rate": 3.728070175438597e-05, | |
"loss": 0.452, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.79, | |
"grad_norm": 20.546531677246094, | |
"learning_rate": 3.6842105263157895e-05, | |
"loss": 0.4047, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.79, | |
"eval_accuracy": 0.8649538866930171, | |
"eval_f1_macro": 0.8467453346863307, | |
"eval_f1_micro": 0.8649538866930171, | |
"eval_loss": 0.4545969069004059, | |
"eval_runtime": 19.151, | |
"eval_samples_per_second": 79.265, | |
"eval_steps_per_second": 2.506, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.82, | |
"grad_norm": 46.09516525268555, | |
"learning_rate": 3.640350877192983e-05, | |
"loss": 0.4348, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.84, | |
"grad_norm": 35.31401443481445, | |
"learning_rate": 3.5964912280701756e-05, | |
"loss": 0.5397, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.87, | |
"grad_norm": 22.361942291259766, | |
"learning_rate": 3.5526315789473684e-05, | |
"loss": 0.5323, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.89, | |
"grad_norm": 42.461910247802734, | |
"learning_rate": 3.508771929824561e-05, | |
"loss": 0.5028, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.92, | |
"grad_norm": 35.93864059448242, | |
"learning_rate": 3.4649122807017546e-05, | |
"loss": 0.568, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.92, | |
"eval_accuracy": 0.8708827404479579, | |
"eval_f1_macro": 0.8540867659285116, | |
"eval_f1_micro": 0.8708827404479579, | |
"eval_loss": 0.42495009303092957, | |
"eval_runtime": 19.1783, | |
"eval_samples_per_second": 79.152, | |
"eval_steps_per_second": 2.503, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.95, | |
"grad_norm": 19.8919734954834, | |
"learning_rate": 3.421052631578947e-05, | |
"loss": 0.3863, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.97, | |
"grad_norm": 46.49361801147461, | |
"learning_rate": 3.377192982456141e-05, | |
"loss": 0.5599, | |
"step": 370 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 31.97792625427246, | |
"learning_rate": 3.3333333333333335e-05, | |
"loss": 0.5042, | |
"step": 380 | |
}, | |
{ | |
"epoch": 1.03, | |
"grad_norm": 30.975046157836914, | |
"learning_rate": 3.289473684210527e-05, | |
"loss": 0.3024, | |
"step": 390 | |
}, | |
{ | |
"epoch": 1.05, | |
"grad_norm": 14.051041603088379, | |
"learning_rate": 3.24561403508772e-05, | |
"loss": 0.2395, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.05, | |
"eval_accuracy": 0.8761528326745718, | |
"eval_f1_macro": 0.8611335072766768, | |
"eval_f1_micro": 0.8761528326745718, | |
"eval_loss": 0.4569604992866516, | |
"eval_runtime": 19.1561, | |
"eval_samples_per_second": 79.244, | |
"eval_steps_per_second": 2.506, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.08, | |
"grad_norm": 28.197834014892578, | |
"learning_rate": 3.2017543859649124e-05, | |
"loss": 0.2336, | |
"step": 410 | |
}, | |
{ | |
"epoch": 1.11, | |
"grad_norm": 17.73228645324707, | |
"learning_rate": 3.157894736842105e-05, | |
"loss": 0.2069, | |
"step": 420 | |
}, | |
{ | |
"epoch": 1.13, | |
"grad_norm": 19.042102813720703, | |
"learning_rate": 3.1140350877192986e-05, | |
"loss": 0.3182, | |
"step": 430 | |
}, | |
{ | |
"epoch": 1.16, | |
"grad_norm": 17.37108039855957, | |
"learning_rate": 3.0701754385964913e-05, | |
"loss": 0.2071, | |
"step": 440 | |
}, | |
{ | |
"epoch": 1.18, | |
"grad_norm": 17.802289962768555, | |
"learning_rate": 3.0263157894736844e-05, | |
"loss": 0.2213, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.18, | |
"eval_accuracy": 0.8774703557312253, | |
"eval_f1_macro": 0.8631213514149485, | |
"eval_f1_micro": 0.8774703557312253, | |
"eval_loss": 0.45242956280708313, | |
"eval_runtime": 19.1508, | |
"eval_samples_per_second": 79.266, | |
"eval_steps_per_second": 2.506, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.21, | |
"grad_norm": 18.258426666259766, | |
"learning_rate": 2.9824561403508772e-05, | |
"loss": 0.2722, | |
"step": 460 | |
}, | |
{ | |
"epoch": 1.24, | |
"grad_norm": 24.840511322021484, | |
"learning_rate": 2.9385964912280706e-05, | |
"loss": 0.2721, | |
"step": 470 | |
}, | |
{ | |
"epoch": 1.26, | |
"grad_norm": 14.744382858276367, | |
"learning_rate": 2.8947368421052634e-05, | |
"loss": 0.2501, | |
"step": 480 | |
}, | |
{ | |
"epoch": 1.29, | |
"grad_norm": 18.21392250061035, | |
"learning_rate": 2.850877192982456e-05, | |
"loss": 0.2516, | |
"step": 490 | |
}, | |
{ | |
"epoch": 1.32, | |
"grad_norm": 7.454222679138184, | |
"learning_rate": 2.8070175438596492e-05, | |
"loss": 0.1778, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.32, | |
"eval_accuracy": 0.8748353096179183, | |
"eval_f1_macro": 0.8507965490074818, | |
"eval_f1_micro": 0.8748353096179183, | |
"eval_loss": 0.4648575782775879, | |
"eval_runtime": 19.1651, | |
"eval_samples_per_second": 79.207, | |
"eval_steps_per_second": 2.505, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.34, | |
"grad_norm": 15.366374969482422, | |
"learning_rate": 2.7631578947368426e-05, | |
"loss": 0.1821, | |
"step": 510 | |
}, | |
{ | |
"epoch": 1.37, | |
"grad_norm": 17.506078720092773, | |
"learning_rate": 2.7192982456140354e-05, | |
"loss": 0.2568, | |
"step": 520 | |
}, | |
{ | |
"epoch": 1.39, | |
"grad_norm": 32.42897033691406, | |
"learning_rate": 2.675438596491228e-05, | |
"loss": 0.2519, | |
"step": 530 | |
}, | |
{ | |
"epoch": 1.42, | |
"grad_norm": 16.54423713684082, | |
"learning_rate": 2.6315789473684212e-05, | |
"loss": 0.1991, | |
"step": 540 | |
}, | |
{ | |
"epoch": 1.45, | |
"grad_norm": 13.094934463500977, | |
"learning_rate": 2.5877192982456143e-05, | |
"loss": 0.1738, | |
"step": 550 | |
}, | |
{ | |
"epoch": 1.45, | |
"eval_accuracy": 0.8794466403162056, | |
"eval_f1_macro": 0.8616811925870786, | |
"eval_f1_micro": 0.8794466403162056, | |
"eval_loss": 0.4853415787220001, | |
"eval_runtime": 19.1386, | |
"eval_samples_per_second": 79.316, | |
"eval_steps_per_second": 2.508, | |
"step": 550 | |
}, | |
{ | |
"epoch": 1.47, | |
"grad_norm": 9.557943344116211, | |
"learning_rate": 2.5438596491228074e-05, | |
"loss": 0.3554, | |
"step": 560 | |
}, | |
{ | |
"epoch": 1.5, | |
"grad_norm": 20.867530822753906, | |
"learning_rate": 2.5e-05, | |
"loss": 0.2558, | |
"step": 570 | |
}, | |
{ | |
"epoch": 1.53, | |
"grad_norm": 16.902305603027344, | |
"learning_rate": 2.456140350877193e-05, | |
"loss": 0.2638, | |
"step": 580 | |
}, | |
{ | |
"epoch": 1.55, | |
"grad_norm": 14.376564979553223, | |
"learning_rate": 2.412280701754386e-05, | |
"loss": 0.1744, | |
"step": 590 | |
}, | |
{ | |
"epoch": 1.58, | |
"grad_norm": 21.982099533081055, | |
"learning_rate": 2.368421052631579e-05, | |
"loss": 0.2643, | |
"step": 600 | |
}, | |
{ | |
"epoch": 1.58, | |
"eval_accuracy": 0.8827404479578392, | |
"eval_f1_macro": 0.8675629871629298, | |
"eval_f1_micro": 0.8827404479578392, | |
"eval_loss": 0.43024396896362305, | |
"eval_runtime": 19.1559, | |
"eval_samples_per_second": 79.244, | |
"eval_steps_per_second": 2.506, | |
"step": 600 | |
}, | |
{ | |
"epoch": 1.61, | |
"grad_norm": 8.151387214660645, | |
"learning_rate": 2.324561403508772e-05, | |
"loss": 0.2679, | |
"step": 610 | |
}, | |
{ | |
"epoch": 1.63, | |
"grad_norm": 22.84608268737793, | |
"learning_rate": 2.280701754385965e-05, | |
"loss": 0.2687, | |
"step": 620 | |
}, | |
{ | |
"epoch": 1.66, | |
"grad_norm": 18.665374755859375, | |
"learning_rate": 2.236842105263158e-05, | |
"loss": 0.1821, | |
"step": 630 | |
}, | |
{ | |
"epoch": 1.68, | |
"grad_norm": 16.556060791015625, | |
"learning_rate": 2.1929824561403507e-05, | |
"loss": 0.2583, | |
"step": 640 | |
}, | |
{ | |
"epoch": 1.71, | |
"grad_norm": 10.120057106018066, | |
"learning_rate": 2.149122807017544e-05, | |
"loss": 0.3357, | |
"step": 650 | |
}, | |
{ | |
"epoch": 1.71, | |
"eval_accuracy": 0.8827404479578392, | |
"eval_f1_macro": 0.8673002865339278, | |
"eval_f1_micro": 0.8827404479578392, | |
"eval_loss": 0.43883296847343445, | |
"eval_runtime": 19.1504, | |
"eval_samples_per_second": 79.267, | |
"eval_steps_per_second": 2.506, | |
"step": 650 | |
}, | |
{ | |
"epoch": 1.74, | |
"grad_norm": 27.886629104614258, | |
"learning_rate": 2.105263157894737e-05, | |
"loss": 0.2225, | |
"step": 660 | |
}, | |
{ | |
"epoch": 1.76, | |
"grad_norm": 17.229507446289062, | |
"learning_rate": 2.06140350877193e-05, | |
"loss": 0.2053, | |
"step": 670 | |
}, | |
{ | |
"epoch": 1.79, | |
"grad_norm": 33.89767837524414, | |
"learning_rate": 2.0175438596491227e-05, | |
"loss": 0.223, | |
"step": 680 | |
}, | |
{ | |
"epoch": 1.82, | |
"grad_norm": 12.045727729797363, | |
"learning_rate": 1.9736842105263158e-05, | |
"loss": 0.1707, | |
"step": 690 | |
}, | |
{ | |
"epoch": 1.84, | |
"grad_norm": 19.970233917236328, | |
"learning_rate": 1.929824561403509e-05, | |
"loss": 0.3029, | |
"step": 700 | |
}, | |
{ | |
"epoch": 1.84, | |
"eval_accuracy": 0.8827404479578392, | |
"eval_f1_macro": 0.8655521627258196, | |
"eval_f1_micro": 0.8827404479578392, | |
"eval_loss": 0.4430885314941406, | |
"eval_runtime": 19.1338, | |
"eval_samples_per_second": 79.336, | |
"eval_steps_per_second": 2.509, | |
"step": 700 | |
}, | |
{ | |
"epoch": 1.87, | |
"grad_norm": 16.294740676879883, | |
"learning_rate": 1.885964912280702e-05, | |
"loss": 0.222, | |
"step": 710 | |
}, | |
{ | |
"epoch": 1.89, | |
"grad_norm": 12.050216674804688, | |
"learning_rate": 1.8421052631578947e-05, | |
"loss": 0.2102, | |
"step": 720 | |
}, | |
{ | |
"epoch": 1.92, | |
"grad_norm": 21.319595336914062, | |
"learning_rate": 1.7982456140350878e-05, | |
"loss": 0.1974, | |
"step": 730 | |
}, | |
{ | |
"epoch": 1.95, | |
"grad_norm": 9.788517951965332, | |
"learning_rate": 1.7543859649122806e-05, | |
"loss": 0.2275, | |
"step": 740 | |
}, | |
{ | |
"epoch": 1.97, | |
"grad_norm": 19.825088500976562, | |
"learning_rate": 1.7105263157894737e-05, | |
"loss": 0.1809, | |
"step": 750 | |
}, | |
{ | |
"epoch": 1.97, | |
"eval_accuracy": 0.8899868247694335, | |
"eval_f1_macro": 0.874229892393047, | |
"eval_f1_micro": 0.8899868247694335, | |
"eval_loss": 0.42660534381866455, | |
"eval_runtime": 19.1466, | |
"eval_samples_per_second": 79.283, | |
"eval_steps_per_second": 2.507, | |
"step": 750 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 14.426370620727539, | |
"learning_rate": 1.6666666666666667e-05, | |
"loss": 0.1963, | |
"step": 760 | |
}, | |
{ | |
"epoch": 2.03, | |
"grad_norm": 17.27349281311035, | |
"learning_rate": 1.62280701754386e-05, | |
"loss": 0.1066, | |
"step": 770 | |
}, | |
{ | |
"epoch": 2.05, | |
"grad_norm": 3.37688946723938, | |
"learning_rate": 1.5789473684210526e-05, | |
"loss": 0.064, | |
"step": 780 | |
}, | |
{ | |
"epoch": 2.08, | |
"grad_norm": 6.233172416687012, | |
"learning_rate": 1.5350877192982457e-05, | |
"loss": 0.0603, | |
"step": 790 | |
}, | |
{ | |
"epoch": 2.11, | |
"grad_norm": 17.858177185058594, | |
"learning_rate": 1.4912280701754386e-05, | |
"loss": 0.0589, | |
"step": 800 | |
}, | |
{ | |
"epoch": 2.11, | |
"eval_accuracy": 0.8945981554677207, | |
"eval_f1_macro": 0.8814951057007789, | |
"eval_f1_micro": 0.8945981554677207, | |
"eval_loss": 0.4498850703239441, | |
"eval_runtime": 19.1446, | |
"eval_samples_per_second": 79.291, | |
"eval_steps_per_second": 2.507, | |
"step": 800 | |
}, | |
{ | |
"epoch": 2.13, | |
"grad_norm": 10.511329650878906, | |
"learning_rate": 1.4473684210526317e-05, | |
"loss": 0.0526, | |
"step": 810 | |
}, | |
{ | |
"epoch": 2.16, | |
"grad_norm": 3.655291795730591, | |
"learning_rate": 1.4035087719298246e-05, | |
"loss": 0.0308, | |
"step": 820 | |
}, | |
{ | |
"epoch": 2.18, | |
"grad_norm": 14.438605308532715, | |
"learning_rate": 1.3596491228070177e-05, | |
"loss": 0.063, | |
"step": 830 | |
}, | |
{ | |
"epoch": 2.21, | |
"grad_norm": 5.1180195808410645, | |
"learning_rate": 1.3157894736842106e-05, | |
"loss": 0.0297, | |
"step": 840 | |
}, | |
{ | |
"epoch": 2.24, | |
"grad_norm": 9.131390571594238, | |
"learning_rate": 1.2719298245614037e-05, | |
"loss": 0.0531, | |
"step": 850 | |
}, | |
{ | |
"epoch": 2.24, | |
"eval_accuracy": 0.8919631093544137, | |
"eval_f1_macro": 0.8757800595234113, | |
"eval_f1_micro": 0.8919631093544137, | |
"eval_loss": 0.475754976272583, | |
"eval_runtime": 19.1667, | |
"eval_samples_per_second": 79.2, | |
"eval_steps_per_second": 2.504, | |
"step": 850 | |
}, | |
{ | |
"epoch": 2.26, | |
"grad_norm": 0.74644935131073, | |
"learning_rate": 1.2280701754385964e-05, | |
"loss": 0.0649, | |
"step": 860 | |
}, | |
{ | |
"epoch": 2.29, | |
"grad_norm": 9.349382400512695, | |
"learning_rate": 1.1842105263157895e-05, | |
"loss": 0.0213, | |
"step": 870 | |
}, | |
{ | |
"epoch": 2.32, | |
"grad_norm": 11.547332763671875, | |
"learning_rate": 1.1403508771929824e-05, | |
"loss": 0.0526, | |
"step": 880 | |
}, | |
{ | |
"epoch": 2.34, | |
"grad_norm": 4.113059043884277, | |
"learning_rate": 1.0964912280701754e-05, | |
"loss": 0.0187, | |
"step": 890 | |
}, | |
{ | |
"epoch": 2.37, | |
"grad_norm": 3.626723527908325, | |
"learning_rate": 1.0526315789473684e-05, | |
"loss": 0.0234, | |
"step": 900 | |
}, | |
{ | |
"epoch": 2.37, | |
"eval_accuracy": 0.8952569169960475, | |
"eval_f1_macro": 0.8803631688376218, | |
"eval_f1_micro": 0.8952569169960475, | |
"eval_loss": 0.47881799936294556, | |
"eval_runtime": 19.145, | |
"eval_samples_per_second": 79.29, | |
"eval_steps_per_second": 2.507, | |
"step": 900 | |
}, | |
{ | |
"epoch": 2.39, | |
"grad_norm": 11.01301383972168, | |
"learning_rate": 1.0087719298245614e-05, | |
"loss": 0.0655, | |
"step": 910 | |
}, | |
{ | |
"epoch": 2.42, | |
"grad_norm": 11.407805442810059, | |
"learning_rate": 9.649122807017545e-06, | |
"loss": 0.027, | |
"step": 920 | |
}, | |
{ | |
"epoch": 2.45, | |
"grad_norm": 1.768430471420288, | |
"learning_rate": 9.210526315789474e-06, | |
"loss": 0.0518, | |
"step": 930 | |
}, | |
{ | |
"epoch": 2.47, | |
"grad_norm": 20.94036102294922, | |
"learning_rate": 8.771929824561403e-06, | |
"loss": 0.0359, | |
"step": 940 | |
}, | |
{ | |
"epoch": 2.5, | |
"grad_norm": 1.7965754270553589, | |
"learning_rate": 8.333333333333334e-06, | |
"loss": 0.0145, | |
"step": 950 | |
}, | |
{ | |
"epoch": 2.5, | |
"eval_accuracy": 0.8939393939393939, | |
"eval_f1_macro": 0.877949638322673, | |
"eval_f1_micro": 0.8939393939393939, | |
"eval_loss": 0.49758803844451904, | |
"eval_runtime": 19.1473, | |
"eval_samples_per_second": 79.28, | |
"eval_steps_per_second": 2.507, | |
"step": 950 | |
}, | |
{ | |
"epoch": 2.53, | |
"grad_norm": 0.1458648443222046, | |
"learning_rate": 7.894736842105263e-06, | |
"loss": 0.0425, | |
"step": 960 | |
}, | |
{ | |
"epoch": 2.55, | |
"grad_norm": 11.682854652404785, | |
"learning_rate": 7.456140350877193e-06, | |
"loss": 0.0851, | |
"step": 970 | |
}, | |
{ | |
"epoch": 2.58, | |
"grad_norm": 1.2510184049606323, | |
"learning_rate": 7.017543859649123e-06, | |
"loss": 0.0299, | |
"step": 980 | |
}, | |
{ | |
"epoch": 2.61, | |
"grad_norm": 10.715560913085938, | |
"learning_rate": 6.578947368421053e-06, | |
"loss": 0.0678, | |
"step": 990 | |
}, | |
{ | |
"epoch": 2.63, | |
"grad_norm": 11.586739540100098, | |
"learning_rate": 6.140350877192982e-06, | |
"loss": 0.058, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 2.63, | |
"eval_accuracy": 0.8992094861660079, | |
"eval_f1_macro": 0.8816426015432036, | |
"eval_f1_micro": 0.8992094861660079, | |
"eval_loss": 0.4966810941696167, | |
"eval_runtime": 19.17, | |
"eval_samples_per_second": 79.186, | |
"eval_steps_per_second": 2.504, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 2.66, | |
"grad_norm": 1.5098384618759155, | |
"learning_rate": 5.701754385964912e-06, | |
"loss": 0.0287, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 2.68, | |
"grad_norm": 6.79250955581665, | |
"learning_rate": 5.263157894736842e-06, | |
"loss": 0.0409, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 2.71, | |
"grad_norm": 2.9859375953674316, | |
"learning_rate": 4.824561403508772e-06, | |
"loss": 0.0379, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 2.74, | |
"grad_norm": 14.098068237304688, | |
"learning_rate": 4.3859649122807014e-06, | |
"loss": 0.046, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 2.76, | |
"grad_norm": 10.598516464233398, | |
"learning_rate": 3.9473684210526315e-06, | |
"loss": 0.05, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 2.76, | |
"eval_accuracy": 0.8932806324110671, | |
"eval_f1_macro": 0.8752583058969902, | |
"eval_f1_micro": 0.8932806324110671, | |
"eval_loss": 0.5112892389297485, | |
"eval_runtime": 19.1422, | |
"eval_samples_per_second": 79.301, | |
"eval_steps_per_second": 2.508, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 2.79, | |
"grad_norm": 0.23401279747486115, | |
"learning_rate": 3.5087719298245615e-06, | |
"loss": 0.0655, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 2.82, | |
"grad_norm": 4.760624885559082, | |
"learning_rate": 3.070175438596491e-06, | |
"loss": 0.0659, | |
"step": 1070 | |
}, | |
{ | |
"epoch": 2.84, | |
"grad_norm": 1.6583623886108398, | |
"learning_rate": 2.631578947368421e-06, | |
"loss": 0.0429, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 2.87, | |
"grad_norm": 0.6040318608283997, | |
"learning_rate": 2.1929824561403507e-06, | |
"loss": 0.0168, | |
"step": 1090 | |
}, | |
{ | |
"epoch": 2.89, | |
"grad_norm": 11.433518409729004, | |
"learning_rate": 1.7543859649122807e-06, | |
"loss": 0.0556, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 2.89, | |
"eval_accuracy": 0.8965744400527009, | |
"eval_f1_macro": 0.8803498700160407, | |
"eval_f1_micro": 0.8965744400527009, | |
"eval_loss": 0.502357542514801, | |
"eval_runtime": 19.1404, | |
"eval_samples_per_second": 79.309, | |
"eval_steps_per_second": 2.508, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 2.92, | |
"grad_norm": 17.50218963623047, | |
"learning_rate": 1.3157894736842106e-06, | |
"loss": 0.0795, | |
"step": 1110 | |
}, | |
{ | |
"epoch": 2.95, | |
"grad_norm": 2.289309501647949, | |
"learning_rate": 8.771929824561404e-07, | |
"loss": 0.0646, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 2.97, | |
"grad_norm": 16.876842498779297, | |
"learning_rate": 4.385964912280702e-07, | |
"loss": 0.0431, | |
"step": 1130 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 0.3004974126815796, | |
"learning_rate": 0.0, | |
"loss": 0.0419, | |
"step": 1140 | |
}, | |
{ | |
"epoch": 3.0, | |
"step": 1140, | |
"total_flos": 1.9041981890455142e+17, | |
"train_loss": 0.48372833394167714, | |
"train_runtime": 1860.5106, | |
"train_samples_per_second": 19.582, | |
"train_steps_per_second": 0.613 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1140, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 50, | |
"total_flos": 1.9041981890455142e+17, | |
"train_batch_size": 16, | |
"trial_name": null, | |
"trial_params": null | |
} | |