|
{ |
|
"best_metric": 0.5038631558418274, |
|
"best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/Qwen/Qwen1.5_1.8B_twitter/checkpoint-250", |
|
"epoch": 3.0, |
|
"eval_steps": 50, |
|
"global_step": 816, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 109.95573425292969, |
|
"learning_rate": 4.938725490196079e-06, |
|
"loss": 1.1177, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 117.97087860107422, |
|
"learning_rate": 4.8774509803921576e-06, |
|
"loss": 0.8448, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 46.05353546142578, |
|
"learning_rate": 4.816176470588236e-06, |
|
"loss": 0.8301, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 41.749080657958984, |
|
"learning_rate": 4.754901960784314e-06, |
|
"loss": 0.7069, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 26.182971954345703, |
|
"learning_rate": 4.693627450980393e-06, |
|
"loss": 0.6585, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.7123161764705882, |
|
"eval_f1_macro": 0.5810560943233977, |
|
"eval_f1_micro": 0.7123161764705882, |
|
"eval_loss": 0.6434972286224365, |
|
"eval_runtime": 2.8155, |
|
"eval_samples_per_second": 386.429, |
|
"eval_steps_per_second": 12.076, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 118.7502212524414, |
|
"learning_rate": 4.632352941176471e-06, |
|
"loss": 0.6179, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 34.24118423461914, |
|
"learning_rate": 4.571078431372549e-06, |
|
"loss": 0.6324, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 92.65385437011719, |
|
"learning_rate": 4.509803921568628e-06, |
|
"loss": 0.5743, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 44.45353317260742, |
|
"learning_rate": 4.448529411764706e-06, |
|
"loss": 0.4798, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 35.91661071777344, |
|
"learning_rate": 4.3872549019607845e-06, |
|
"loss": 0.6396, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.7297794117647058, |
|
"eval_f1_macro": 0.6997657235537935, |
|
"eval_f1_micro": 0.7297794117647058, |
|
"eval_loss": 0.6015912294387817, |
|
"eval_runtime": 2.832, |
|
"eval_samples_per_second": 384.18, |
|
"eval_steps_per_second": 12.006, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 56.78458786010742, |
|
"learning_rate": 4.3259803921568635e-06, |
|
"loss": 0.5814, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 53.924827575683594, |
|
"learning_rate": 4.264705882352942e-06, |
|
"loss": 0.5131, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 74.93573760986328, |
|
"learning_rate": 4.20343137254902e-06, |
|
"loss": 0.5242, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 26.097169876098633, |
|
"learning_rate": 4.142156862745099e-06, |
|
"loss": 0.5529, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 78.16165161132812, |
|
"learning_rate": 4.080882352941177e-06, |
|
"loss": 0.5108, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.7527573529411765, |
|
"eval_f1_macro": 0.6963299829940972, |
|
"eval_f1_micro": 0.7527573529411765, |
|
"eval_loss": 0.5226907134056091, |
|
"eval_runtime": 2.826, |
|
"eval_samples_per_second": 384.993, |
|
"eval_steps_per_second": 12.031, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 19.04606056213379, |
|
"learning_rate": 4.019607843137255e-06, |
|
"loss": 0.5012, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 127.43460845947266, |
|
"learning_rate": 3.958333333333333e-06, |
|
"loss": 0.5601, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 73.45539093017578, |
|
"learning_rate": 3.897058823529412e-06, |
|
"loss": 0.6134, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 59.42979049682617, |
|
"learning_rate": 3.8357843137254904e-06, |
|
"loss": 0.5447, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 22.45537757873535, |
|
"learning_rate": 3.774509803921569e-06, |
|
"loss": 0.5065, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.7417279411764706, |
|
"eval_f1_macro": 0.6346563132227484, |
|
"eval_f1_micro": 0.7417279411764706, |
|
"eval_loss": 0.5502642393112183, |
|
"eval_runtime": 2.8852, |
|
"eval_samples_per_second": 377.094, |
|
"eval_steps_per_second": 11.784, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 50.938880920410156, |
|
"learning_rate": 3.7132352941176476e-06, |
|
"loss": 0.4589, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 84.68132781982422, |
|
"learning_rate": 3.6519607843137257e-06, |
|
"loss": 0.5403, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 27.052024841308594, |
|
"learning_rate": 3.5906862745098043e-06, |
|
"loss": 0.4618, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 23.497787475585938, |
|
"learning_rate": 3.529411764705883e-06, |
|
"loss": 0.452, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 15.185086250305176, |
|
"learning_rate": 3.468137254901961e-06, |
|
"loss": 0.4883, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.7775735294117647, |
|
"eval_f1_macro": 0.7420002194942226, |
|
"eval_f1_micro": 0.7775735294117647, |
|
"eval_loss": 0.5038631558418274, |
|
"eval_runtime": 2.8371, |
|
"eval_samples_per_second": 383.494, |
|
"eval_steps_per_second": 11.984, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 24.239835739135742, |
|
"learning_rate": 3.4068627450980396e-06, |
|
"loss": 0.4325, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 13.703453063964844, |
|
"learning_rate": 3.3455882352941178e-06, |
|
"loss": 0.5193, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 92.7187728881836, |
|
"learning_rate": 3.2843137254901964e-06, |
|
"loss": 0.4147, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 38.43824768066406, |
|
"learning_rate": 3.223039215686275e-06, |
|
"loss": 0.3275, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 11.464463233947754, |
|
"learning_rate": 3.161764705882353e-06, |
|
"loss": 0.3296, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.7729779411764706, |
|
"eval_f1_macro": 0.730720863982653, |
|
"eval_f1_micro": 0.7729779411764706, |
|
"eval_loss": 0.5249598026275635, |
|
"eval_runtime": 2.8316, |
|
"eval_samples_per_second": 384.236, |
|
"eval_steps_per_second": 12.007, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 18.103742599487305, |
|
"learning_rate": 3.1004901960784317e-06, |
|
"loss": 0.32, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 48.70182800292969, |
|
"learning_rate": 3.03921568627451e-06, |
|
"loss": 0.3033, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 19.211103439331055, |
|
"learning_rate": 2.9779411764705884e-06, |
|
"loss": 0.371, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 30.80494499206543, |
|
"learning_rate": 2.916666666666667e-06, |
|
"loss": 0.3158, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 22.932558059692383, |
|
"learning_rate": 2.855392156862745e-06, |
|
"loss": 0.322, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.7720588235294118, |
|
"eval_f1_macro": 0.7422500391653388, |
|
"eval_f1_micro": 0.7720588235294118, |
|
"eval_loss": 0.5509535670280457, |
|
"eval_runtime": 2.8424, |
|
"eval_samples_per_second": 382.778, |
|
"eval_steps_per_second": 11.962, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 12.943411827087402, |
|
"learning_rate": 2.7941176470588237e-06, |
|
"loss": 0.3103, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 35.10004425048828, |
|
"learning_rate": 2.732843137254902e-06, |
|
"loss": 0.2967, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 21.274372100830078, |
|
"learning_rate": 2.6715686274509804e-06, |
|
"loss": 0.3739, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 33.91293716430664, |
|
"learning_rate": 2.610294117647059e-06, |
|
"loss": 0.3583, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 33.361366271972656, |
|
"learning_rate": 2.549019607843137e-06, |
|
"loss": 0.3287, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.7582720588235294, |
|
"eval_f1_macro": 0.6932231064719864, |
|
"eval_f1_micro": 0.7582720588235294, |
|
"eval_loss": 0.539191722869873, |
|
"eval_runtime": 2.8418, |
|
"eval_samples_per_second": 382.862, |
|
"eval_steps_per_second": 11.964, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 41.59482192993164, |
|
"learning_rate": 2.4877450980392158e-06, |
|
"loss": 0.2896, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 57.865882873535156, |
|
"learning_rate": 2.4264705882352943e-06, |
|
"loss": 0.3092, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 37.957183837890625, |
|
"learning_rate": 2.3651960784313725e-06, |
|
"loss": 0.3175, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 30.82972526550293, |
|
"learning_rate": 2.303921568627451e-06, |
|
"loss": 0.3005, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 34.46055603027344, |
|
"learning_rate": 2.2426470588235296e-06, |
|
"loss": 0.3097, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.7628676470588235, |
|
"eval_f1_macro": 0.7222644376899696, |
|
"eval_f1_micro": 0.7628676470588235, |
|
"eval_loss": 0.5631462335586548, |
|
"eval_runtime": 2.8377, |
|
"eval_samples_per_second": 383.405, |
|
"eval_steps_per_second": 11.981, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 25.395221710205078, |
|
"learning_rate": 2.1813725490196082e-06, |
|
"loss": 0.3284, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 13.454970359802246, |
|
"learning_rate": 2.1200980392156864e-06, |
|
"loss": 0.3399, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 14.682934761047363, |
|
"learning_rate": 2.058823529411765e-06, |
|
"loss": 0.2575, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 8.564711570739746, |
|
"learning_rate": 1.9975490196078435e-06, |
|
"loss": 0.3048, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 30.56090545654297, |
|
"learning_rate": 1.9362745098039217e-06, |
|
"loss": 0.3397, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.7674632352941176, |
|
"eval_f1_macro": 0.7333806007808814, |
|
"eval_f1_micro": 0.7674632352941176, |
|
"eval_loss": 0.5669376254081726, |
|
"eval_runtime": 2.8353, |
|
"eval_samples_per_second": 383.737, |
|
"eval_steps_per_second": 11.992, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 50.637630462646484, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 0.3389, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 69.52845001220703, |
|
"learning_rate": 1.8137254901960786e-06, |
|
"loss": 0.365, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 19.709930419921875, |
|
"learning_rate": 1.752450980392157e-06, |
|
"loss": 0.3561, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 22.103239059448242, |
|
"learning_rate": 1.6911764705882356e-06, |
|
"loss": 0.3147, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 18.630016326904297, |
|
"learning_rate": 1.629901960784314e-06, |
|
"loss": 0.2618, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.75, |
|
"eval_f1_macro": 0.6870389170896785, |
|
"eval_f1_micro": 0.75, |
|
"eval_loss": 0.5891400575637817, |
|
"eval_runtime": 3.0028, |
|
"eval_samples_per_second": 362.323, |
|
"eval_steps_per_second": 11.323, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 20.225698471069336, |
|
"learning_rate": 1.5686274509803923e-06, |
|
"loss": 0.2155, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 20.72734832763672, |
|
"learning_rate": 1.5073529411764707e-06, |
|
"loss": 0.1837, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 12.758101463317871, |
|
"learning_rate": 1.4460784313725492e-06, |
|
"loss": 0.1936, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 20.31243896484375, |
|
"learning_rate": 1.3848039215686276e-06, |
|
"loss": 0.1668, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 6.187927722930908, |
|
"learning_rate": 1.323529411764706e-06, |
|
"loss": 0.1745, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.7582720588235294, |
|
"eval_f1_macro": 0.7122790012056079, |
|
"eval_f1_micro": 0.7582720588235294, |
|
"eval_loss": 0.6399500370025635, |
|
"eval_runtime": 2.8351, |
|
"eval_samples_per_second": 383.754, |
|
"eval_steps_per_second": 11.992, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 16.426952362060547, |
|
"learning_rate": 1.2622549019607843e-06, |
|
"loss": 0.1873, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 14.441506385803223, |
|
"learning_rate": 1.200980392156863e-06, |
|
"loss": 0.1318, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 25.300006866455078, |
|
"learning_rate": 1.1397058823529413e-06, |
|
"loss": 0.1468, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 5.833555221557617, |
|
"learning_rate": 1.0784313725490197e-06, |
|
"loss": 0.1969, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 14.262299537658691, |
|
"learning_rate": 1.017156862745098e-06, |
|
"loss": 0.1572, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_accuracy": 0.7518382352941176, |
|
"eval_f1_macro": 0.6966693100713719, |
|
"eval_f1_micro": 0.7518382352941176, |
|
"eval_loss": 0.6694048643112183, |
|
"eval_runtime": 2.8384, |
|
"eval_samples_per_second": 383.315, |
|
"eval_steps_per_second": 11.979, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 9.928728103637695, |
|
"learning_rate": 9.558823529411764e-07, |
|
"loss": 0.1654, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 36.1452522277832, |
|
"learning_rate": 8.94607843137255e-07, |
|
"loss": 0.181, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 14.285146713256836, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.1702, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 22.802335739135742, |
|
"learning_rate": 7.720588235294119e-07, |
|
"loss": 0.1375, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 25.91615867614746, |
|
"learning_rate": 7.107843137254903e-07, |
|
"loss": 0.1804, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.7610294117647058, |
|
"eval_f1_macro": 0.7172783241384825, |
|
"eval_f1_micro": 0.7610294117647058, |
|
"eval_loss": 0.6869542598724365, |
|
"eval_runtime": 2.8367, |
|
"eval_samples_per_second": 383.549, |
|
"eval_steps_per_second": 11.986, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 21.840639114379883, |
|
"learning_rate": 6.495098039215687e-07, |
|
"loss": 0.1811, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 12.820947647094727, |
|
"learning_rate": 5.882352941176471e-07, |
|
"loss": 0.1636, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 14.430222511291504, |
|
"learning_rate": 5.269607843137256e-07, |
|
"loss": 0.1478, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 15.748844146728516, |
|
"learning_rate": 4.6568627450980395e-07, |
|
"loss": 0.1738, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 21.588491439819336, |
|
"learning_rate": 4.044117647058824e-07, |
|
"loss": 0.1817, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.7536764705882353, |
|
"eval_f1_macro": 0.7044581029142274, |
|
"eval_f1_micro": 0.7536764705882353, |
|
"eval_loss": 0.6656494140625, |
|
"eval_runtime": 2.8877, |
|
"eval_samples_per_second": 376.77, |
|
"eval_steps_per_second": 11.774, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 34.946441650390625, |
|
"learning_rate": 3.4313725490196084e-07, |
|
"loss": 0.1558, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 14.45042610168457, |
|
"learning_rate": 2.8186274509803926e-07, |
|
"loss": 0.1671, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 22.017681121826172, |
|
"learning_rate": 2.2058823529411768e-07, |
|
"loss": 0.16, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 40.37516403198242, |
|
"learning_rate": 1.5931372549019607e-07, |
|
"loss": 0.2156, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 21.41912841796875, |
|
"learning_rate": 9.803921568627452e-08, |
|
"loss": 0.1984, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_accuracy": 0.7518382352941176, |
|
"eval_f1_macro": 0.6949038473522191, |
|
"eval_f1_micro": 0.7518382352941176, |
|
"eval_loss": 0.6783303618431091, |
|
"eval_runtime": 2.8354, |
|
"eval_samples_per_second": 383.725, |
|
"eval_steps_per_second": 11.991, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 22.50458335876465, |
|
"learning_rate": 3.6764705882352945e-08, |
|
"loss": 0.1742, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 816, |
|
"total_flos": 2.435566082929459e+16, |
|
"train_loss": 0.3625207788803998, |
|
"train_runtime": 885.2091, |
|
"train_samples_per_second": 29.485, |
|
"train_steps_per_second": 0.922 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 816, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"total_flos": 2.435566082929459e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|