|
{ |
|
"best_metric": 0.4901912808418274, |
|
"best_model_checkpoint": "../experiments_checkpoints/LoRA/Qwen/Qwen1.5_7B_LoRA_MAdAiLab/twitter_disaster/checkpoint-250", |
|
"epoch": 3.0, |
|
"eval_steps": 50, |
|
"global_step": 816, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 24.977563858032227, |
|
"learning_rate": 4.9387254901960786e-05, |
|
"loss": 1.7697, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 142.99278259277344, |
|
"learning_rate": 4.877450980392157e-05, |
|
"loss": 1.666, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 87.81151580810547, |
|
"learning_rate": 4.816176470588236e-05, |
|
"loss": 1.0546, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 50.04526901245117, |
|
"learning_rate": 4.7549019607843135e-05, |
|
"loss": 0.813, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 22.704946517944336, |
|
"learning_rate": 4.6936274509803925e-05, |
|
"loss": 0.8422, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.7178308823529411, |
|
"eval_f1_macro": 0.6371659017461913, |
|
"eval_f1_micro": 0.7178308823529411, |
|
"eval_loss": 0.6453067660331726, |
|
"eval_runtime": 13.5072, |
|
"eval_samples_per_second": 80.55, |
|
"eval_steps_per_second": 2.517, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 20.923370361328125, |
|
"learning_rate": 4.632352941176471e-05, |
|
"loss": 0.5966, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 66.72351837158203, |
|
"learning_rate": 4.571078431372549e-05, |
|
"loss": 0.7947, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 157.05340576171875, |
|
"learning_rate": 4.5098039215686275e-05, |
|
"loss": 0.8614, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 68.06228637695312, |
|
"learning_rate": 4.448529411764706e-05, |
|
"loss": 0.6034, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 15.797639846801758, |
|
"learning_rate": 4.387254901960784e-05, |
|
"loss": 0.6082, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.7472426470588235, |
|
"eval_f1_macro": 0.7123361820896584, |
|
"eval_f1_micro": 0.7472426470588235, |
|
"eval_loss": 0.5489143133163452, |
|
"eval_runtime": 13.5318, |
|
"eval_samples_per_second": 80.403, |
|
"eval_steps_per_second": 2.513, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 45.799476623535156, |
|
"learning_rate": 4.325980392156863e-05, |
|
"loss": 0.5511, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 49.33269500732422, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 0.4801, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 36.33636474609375, |
|
"learning_rate": 4.20343137254902e-05, |
|
"loss": 0.4603, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 82.08908081054688, |
|
"learning_rate": 4.142156862745099e-05, |
|
"loss": 0.5596, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 13.81618595123291, |
|
"learning_rate": 4.0808823529411765e-05, |
|
"loss": 0.4305, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.7251838235294118, |
|
"eval_f1_macro": 0.5776786815440837, |
|
"eval_f1_micro": 0.7251838235294118, |
|
"eval_loss": 0.5571720004081726, |
|
"eval_runtime": 13.5624, |
|
"eval_samples_per_second": 80.222, |
|
"eval_steps_per_second": 2.507, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 57.01852035522461, |
|
"learning_rate": 4.0196078431372555e-05, |
|
"loss": 0.5056, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 53.905147552490234, |
|
"learning_rate": 3.958333333333333e-05, |
|
"loss": 0.567, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 47.106292724609375, |
|
"learning_rate": 3.897058823529412e-05, |
|
"loss": 0.5069, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 8.995195388793945, |
|
"learning_rate": 3.8357843137254904e-05, |
|
"loss": 0.5449, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 79.07156372070312, |
|
"learning_rate": 3.774509803921569e-05, |
|
"loss": 0.5021, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.7720588235294118, |
|
"eval_f1_macro": 0.7436838605490643, |
|
"eval_f1_micro": 0.7720588235294118, |
|
"eval_loss": 0.49997127056121826, |
|
"eval_runtime": 13.6431, |
|
"eval_samples_per_second": 79.747, |
|
"eval_steps_per_second": 2.492, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 15.758883476257324, |
|
"learning_rate": 3.713235294117647e-05, |
|
"loss": 0.5018, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 183.47061157226562, |
|
"learning_rate": 3.6519607843137254e-05, |
|
"loss": 0.616, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 48.367374420166016, |
|
"learning_rate": 3.5906862745098044e-05, |
|
"loss": 0.4927, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 5.8350114822387695, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.4508, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 15.554094314575195, |
|
"learning_rate": 3.468137254901961e-05, |
|
"loss": 0.4715, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.7766544117647058, |
|
"eval_f1_macro": 0.7450627015924902, |
|
"eval_f1_micro": 0.7766544117647058, |
|
"eval_loss": 0.4901912808418274, |
|
"eval_runtime": 13.5595, |
|
"eval_samples_per_second": 80.239, |
|
"eval_steps_per_second": 2.507, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 37.8280029296875, |
|
"learning_rate": 3.4068627450980394e-05, |
|
"loss": 0.4188, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 20.624736785888672, |
|
"learning_rate": 3.345588235294118e-05, |
|
"loss": 0.5049, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 60.454341888427734, |
|
"learning_rate": 3.284313725490196e-05, |
|
"loss": 0.4536, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 53.48725509643555, |
|
"learning_rate": 3.223039215686275e-05, |
|
"loss": 0.4097, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 18.42328453063965, |
|
"learning_rate": 3.161764705882353e-05, |
|
"loss": 0.3937, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.7601102941176471, |
|
"eval_f1_macro": 0.7018342410563818, |
|
"eval_f1_micro": 0.7601102941176471, |
|
"eval_loss": 0.5194450616836548, |
|
"eval_runtime": 13.566, |
|
"eval_samples_per_second": 80.2, |
|
"eval_steps_per_second": 2.506, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 42.46508026123047, |
|
"learning_rate": 3.100490196078432e-05, |
|
"loss": 0.4045, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 65.97752380371094, |
|
"learning_rate": 3.0392156862745097e-05, |
|
"loss": 0.3829, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 4.754347801208496, |
|
"learning_rate": 2.9779411764705883e-05, |
|
"loss": 0.4535, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 86.20097351074219, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.4082, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 17.790315628051758, |
|
"learning_rate": 2.855392156862745e-05, |
|
"loss": 0.4219, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.7665441176470589, |
|
"eval_f1_macro": 0.7228498074454428, |
|
"eval_f1_micro": 0.7665441176470589, |
|
"eval_loss": 0.5227769017219543, |
|
"eval_runtime": 13.5702, |
|
"eval_samples_per_second": 80.176, |
|
"eval_steps_per_second": 2.505, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 47.08971405029297, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 0.4235, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 13.4403715133667, |
|
"learning_rate": 2.732843137254902e-05, |
|
"loss": 0.3631, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 50.05192184448242, |
|
"learning_rate": 2.6715686274509806e-05, |
|
"loss": 0.5085, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 97.1346206665039, |
|
"learning_rate": 2.6102941176470593e-05, |
|
"loss": 0.4432, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 52.068641662597656, |
|
"learning_rate": 2.5490196078431373e-05, |
|
"loss": 0.4315, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.7555147058823529, |
|
"eval_f1_macro": 0.6900563751949143, |
|
"eval_f1_micro": 0.7555147058823529, |
|
"eval_loss": 0.5791015625, |
|
"eval_runtime": 13.6465, |
|
"eval_samples_per_second": 79.728, |
|
"eval_steps_per_second": 2.491, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 4.731142997741699, |
|
"learning_rate": 2.487745098039216e-05, |
|
"loss": 0.4396, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 22.810226440429688, |
|
"learning_rate": 2.4264705882352942e-05, |
|
"loss": 0.4104, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 14.011224746704102, |
|
"learning_rate": 2.3651960784313726e-05, |
|
"loss": 0.3847, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 27.048315048217773, |
|
"learning_rate": 2.303921568627451e-05, |
|
"loss": 0.3681, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 6.750571250915527, |
|
"learning_rate": 2.2426470588235296e-05, |
|
"loss": 0.4134, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.7389705882352942, |
|
"eval_f1_macro": 0.719594754017431, |
|
"eval_f1_micro": 0.7389705882352942, |
|
"eval_loss": 0.6182358860969543, |
|
"eval_runtime": 13.5558, |
|
"eval_samples_per_second": 80.261, |
|
"eval_steps_per_second": 2.508, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 51.78306579589844, |
|
"learning_rate": 2.181372549019608e-05, |
|
"loss": 0.4691, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 56.603797912597656, |
|
"learning_rate": 2.1200980392156862e-05, |
|
"loss": 0.4194, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 24.38219451904297, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.3631, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 44.108612060546875, |
|
"learning_rate": 1.9975490196078432e-05, |
|
"loss": 0.3859, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 68.38048553466797, |
|
"learning_rate": 1.936274509803922e-05, |
|
"loss": 0.4173, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.7637867647058824, |
|
"eval_f1_macro": 0.7115502256608639, |
|
"eval_f1_micro": 0.7637867647058824, |
|
"eval_loss": 0.5453814268112183, |
|
"eval_runtime": 13.5612, |
|
"eval_samples_per_second": 80.229, |
|
"eval_steps_per_second": 2.507, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 18.69806480407715, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.4323, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 15.339709281921387, |
|
"learning_rate": 1.8137254901960785e-05, |
|
"loss": 0.4529, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 85.60708618164062, |
|
"learning_rate": 1.7524509803921568e-05, |
|
"loss": 0.4462, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 42.06242752075195, |
|
"learning_rate": 1.6911764705882355e-05, |
|
"loss": 0.3753, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 43.271724700927734, |
|
"learning_rate": 1.6299019607843138e-05, |
|
"loss": 0.3278, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.7720588235294118, |
|
"eval_f1_macro": 0.7219169329073483, |
|
"eval_f1_micro": 0.7720588235294118, |
|
"eval_loss": 0.5476648807525635, |
|
"eval_runtime": 13.5763, |
|
"eval_samples_per_second": 80.14, |
|
"eval_steps_per_second": 2.504, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 63.24125289916992, |
|
"learning_rate": 1.568627450980392e-05, |
|
"loss": 0.3151, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 18.521787643432617, |
|
"learning_rate": 1.5073529411764706e-05, |
|
"loss": 0.2909, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 21.641969680786133, |
|
"learning_rate": 1.4460784313725493e-05, |
|
"loss": 0.2893, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 26.12430763244629, |
|
"learning_rate": 1.3848039215686276e-05, |
|
"loss": 0.2642, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 5.281397819519043, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.2641, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.7527573529411765, |
|
"eval_f1_macro": 0.7152217678178568, |
|
"eval_f1_micro": 0.7527573529411765, |
|
"eval_loss": 0.6011173129081726, |
|
"eval_runtime": 13.6441, |
|
"eval_samples_per_second": 79.741, |
|
"eval_steps_per_second": 2.492, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 32.14168167114258, |
|
"learning_rate": 1.2622549019607843e-05, |
|
"loss": 0.2578, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 8.231173515319824, |
|
"learning_rate": 1.200980392156863e-05, |
|
"loss": 0.2012, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 46.39328384399414, |
|
"learning_rate": 1.1397058823529412e-05, |
|
"loss": 0.2582, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 52.687957763671875, |
|
"learning_rate": 1.0784313725490197e-05, |
|
"loss": 0.289, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 22.3091983795166, |
|
"learning_rate": 1.017156862745098e-05, |
|
"loss": 0.2256, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_accuracy": 0.7601102941176471, |
|
"eval_f1_macro": 0.6962074067417461, |
|
"eval_f1_micro": 0.7601102941176471, |
|
"eval_loss": 0.6484518647193909, |
|
"eval_runtime": 13.5588, |
|
"eval_samples_per_second": 80.243, |
|
"eval_steps_per_second": 2.508, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 16.085580825805664, |
|
"learning_rate": 9.558823529411764e-06, |
|
"loss": 0.2501, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 20.8741455078125, |
|
"learning_rate": 8.946078431372549e-06, |
|
"loss": 0.3018, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 25.310302734375, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.2451, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 17.985490798950195, |
|
"learning_rate": 7.720588235294119e-06, |
|
"loss": 0.232, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 7.154005527496338, |
|
"learning_rate": 7.107843137254902e-06, |
|
"loss": 0.2544, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.7628676470588235, |
|
"eval_f1_macro": 0.7165018421562924, |
|
"eval_f1_micro": 0.7628676470588235, |
|
"eval_loss": 0.6459027528762817, |
|
"eval_runtime": 13.5807, |
|
"eval_samples_per_second": 80.114, |
|
"eval_steps_per_second": 2.504, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 9.307502746582031, |
|
"learning_rate": 6.495098039215687e-06, |
|
"loss": 0.3067, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 9.009349822998047, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.2225, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 14.490361213684082, |
|
"learning_rate": 5.269607843137255e-06, |
|
"loss": 0.2576, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 24.25351333618164, |
|
"learning_rate": 4.65686274509804e-06, |
|
"loss": 0.2805, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 14.948960304260254, |
|
"learning_rate": 4.044117647058824e-06, |
|
"loss": 0.2839, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.765625, |
|
"eval_f1_macro": 0.7252674888969208, |
|
"eval_f1_micro": 0.765625, |
|
"eval_loss": 0.5921774506568909, |
|
"eval_runtime": 13.5697, |
|
"eval_samples_per_second": 80.178, |
|
"eval_steps_per_second": 2.506, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 43.209163665771484, |
|
"learning_rate": 3.431372549019608e-06, |
|
"loss": 0.2235, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 7.3651604652404785, |
|
"learning_rate": 2.818627450980392e-06, |
|
"loss": 0.2487, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 21.250938415527344, |
|
"learning_rate": 2.2058823529411767e-06, |
|
"loss": 0.2038, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 21.40656852722168, |
|
"learning_rate": 1.5931372549019608e-06, |
|
"loss": 0.27, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 32.97100830078125, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 0.2634, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_accuracy": 0.7637867647058824, |
|
"eval_f1_macro": 0.7075804081718023, |
|
"eval_f1_micro": 0.7637867647058824, |
|
"eval_loss": 0.6311897039413452, |
|
"eval_runtime": 13.5748, |
|
"eval_samples_per_second": 80.148, |
|
"eval_steps_per_second": 2.505, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 8.623468399047852, |
|
"learning_rate": 3.6764705882352943e-07, |
|
"loss": 0.2572, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 816, |
|
"total_flos": 1.3629706293215232e+17, |
|
"train_loss": 0.4493609409706265, |
|
"train_runtime": 1238.3624, |
|
"train_samples_per_second": 21.076, |
|
"train_steps_per_second": 0.659 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 816, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"total_flos": 1.3629706293215232e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|