|
{ |
|
"best_metric": 0.15021921694278717, |
|
"best_model_checkpoint": "ViT_Flower102_2/checkpoint-1600", |
|
"epoch": 10.0, |
|
"eval_steps": 100, |
|
"global_step": 4490, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0026773272547870874, |
|
"learning_rate": 0.00019955456570155904, |
|
"loss": 0.0009, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.005304301157593727, |
|
"learning_rate": 0.00019910913140311804, |
|
"loss": 0.0009, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 20.306926727294922, |
|
"learning_rate": 0.00019866369710467706, |
|
"loss": 0.0194, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.321718692779541, |
|
"learning_rate": 0.0001982182628062361, |
|
"loss": 0.1544, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.021045241504907608, |
|
"learning_rate": 0.00019777282850779511, |
|
"loss": 0.0701, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.02391223795711994, |
|
"learning_rate": 0.00019732739420935414, |
|
"loss": 0.1303, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.20434625446796417, |
|
"learning_rate": 0.00019688195991091317, |
|
"loss": 0.1413, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.2549870014190674, |
|
"learning_rate": 0.00019643652561247217, |
|
"loss": 0.1047, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.997387886047363, |
|
"learning_rate": 0.0001959910913140312, |
|
"loss": 0.1234, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 5.206876277923584, |
|
"learning_rate": 0.0001955456570155902, |
|
"loss": 0.053, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9235294117647059, |
|
"eval_f1": 0.9235294117647059, |
|
"eval_loss": 0.3198450803756714, |
|
"eval_precision": 0.9235294117647059, |
|
"eval_recall": 0.9235294117647059, |
|
"eval_runtime": 17.2015, |
|
"eval_samples_per_second": 59.297, |
|
"eval_steps_per_second": 7.441, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 6.388282299041748, |
|
"learning_rate": 0.00019510022271714922, |
|
"loss": 0.1377, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.129618167877197, |
|
"learning_rate": 0.00019465478841870825, |
|
"loss": 0.0561, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 8.198286056518555, |
|
"learning_rate": 0.00019420935412026727, |
|
"loss": 0.0417, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 7.592113494873047, |
|
"learning_rate": 0.0001937639198218263, |
|
"loss": 0.0388, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.7412798404693604, |
|
"learning_rate": 0.00019331848552338533, |
|
"loss": 0.0921, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12602120637893677, |
|
"learning_rate": 0.00019287305122494432, |
|
"loss": 0.1905, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 5.073046684265137, |
|
"learning_rate": 0.00019242761692650335, |
|
"loss": 0.1331, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.5400639176368713, |
|
"learning_rate": 0.00019198218262806238, |
|
"loss": 0.0315, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 11.878341674804688, |
|
"learning_rate": 0.00019153674832962138, |
|
"loss": 0.2794, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.48547011613845825, |
|
"learning_rate": 0.0001910913140311804, |
|
"loss": 0.1225, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_f1": 0.9166666666666666, |
|
"eval_loss": 0.40865278244018555, |
|
"eval_precision": 0.9166666666666666, |
|
"eval_recall": 0.9166666666666666, |
|
"eval_runtime": 17.0881, |
|
"eval_samples_per_second": 59.691, |
|
"eval_steps_per_second": 7.491, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.015034608542919159, |
|
"learning_rate": 0.00019064587973273943, |
|
"loss": 0.0953, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.665817141532898, |
|
"learning_rate": 0.00019020044543429846, |
|
"loss": 0.081, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.737998127937317, |
|
"learning_rate": 0.00018975501113585748, |
|
"loss": 0.037, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.08088938146829605, |
|
"learning_rate": 0.00018930957683741648, |
|
"loss": 0.2113, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.148426055908203, |
|
"learning_rate": 0.0001888641425389755, |
|
"loss": 0.0674, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.07585727423429489, |
|
"learning_rate": 0.00018841870824053454, |
|
"loss": 0.1698, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 4.297791481018066, |
|
"learning_rate": 0.00018797327394209353, |
|
"loss": 0.1087, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.039582639932632446, |
|
"learning_rate": 0.00018752783964365256, |
|
"loss": 0.082, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.647770881652832, |
|
"learning_rate": 0.0001870824053452116, |
|
"loss": 0.0256, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.05055054649710655, |
|
"learning_rate": 0.00018663697104677061, |
|
"loss": 0.1985, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9568627450980393, |
|
"eval_f1": 0.9568627450980393, |
|
"eval_loss": 0.20681221783161163, |
|
"eval_precision": 0.9568627450980393, |
|
"eval_recall": 0.9568627450980393, |
|
"eval_runtime": 17.1131, |
|
"eval_samples_per_second": 59.603, |
|
"eval_steps_per_second": 7.48, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 12.765353202819824, |
|
"learning_rate": 0.00018619153674832964, |
|
"loss": 0.0698, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 9.829974174499512, |
|
"learning_rate": 0.00018574610244988867, |
|
"loss": 0.076, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.024047628045082092, |
|
"learning_rate": 0.00018530066815144767, |
|
"loss": 0.0335, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 6.1861066818237305, |
|
"learning_rate": 0.0001848552338530067, |
|
"loss": 0.1085, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.6630871295928955, |
|
"learning_rate": 0.00018440979955456572, |
|
"loss": 0.1537, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.012870335020124912, |
|
"learning_rate": 0.00018396436525612472, |
|
"loss": 0.0729, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.9247069954872131, |
|
"learning_rate": 0.00018351893095768375, |
|
"loss": 0.0293, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.0049523478373885155, |
|
"learning_rate": 0.00018307349665924277, |
|
"loss": 0.043, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 6.6150336265563965, |
|
"learning_rate": 0.0001826280623608018, |
|
"loss": 0.0184, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.16694368422031403, |
|
"learning_rate": 0.00018218262806236082, |
|
"loss": 0.0804, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.9333333333333333, |
|
"eval_f1": 0.9333333333333333, |
|
"eval_loss": 0.3181270360946655, |
|
"eval_precision": 0.9333333333333333, |
|
"eval_recall": 0.9333333333333333, |
|
"eval_runtime": 16.8894, |
|
"eval_samples_per_second": 60.393, |
|
"eval_steps_per_second": 7.579, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.3894243240356445, |
|
"learning_rate": 0.00018173719376391982, |
|
"loss": 0.0611, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.014911720529198647, |
|
"learning_rate": 0.00018129175946547885, |
|
"loss": 0.0172, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.01411391980946064, |
|
"learning_rate": 0.00018084632516703788, |
|
"loss": 0.2164, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 13.27253532409668, |
|
"learning_rate": 0.00018040089086859688, |
|
"loss": 0.1459, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.028676768764853477, |
|
"learning_rate": 0.0001799554565701559, |
|
"loss": 0.0702, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 4.484796047210693, |
|
"learning_rate": 0.00017951002227171493, |
|
"loss": 0.1319, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 5.17644739151001, |
|
"learning_rate": 0.00017906458797327396, |
|
"loss": 0.1276, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 4.405980110168457, |
|
"learning_rate": 0.00017861915367483298, |
|
"loss": 0.1718, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.1352098286151886, |
|
"learning_rate": 0.000178173719376392, |
|
"loss": 0.1796, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 8.556909561157227, |
|
"learning_rate": 0.000177728285077951, |
|
"loss": 0.1672, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.9274509803921569, |
|
"eval_f1": 0.9274509803921569, |
|
"eval_loss": 0.35819730162620544, |
|
"eval_precision": 0.9274509803921569, |
|
"eval_recall": 0.9274509803921569, |
|
"eval_runtime": 16.8086, |
|
"eval_samples_per_second": 60.683, |
|
"eval_steps_per_second": 7.615, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.4172447919845581, |
|
"learning_rate": 0.00017728285077951003, |
|
"loss": 0.0686, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 3.2105464935302734, |
|
"learning_rate": 0.00017683741648106903, |
|
"loss": 0.0347, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.23049691319465637, |
|
"learning_rate": 0.00017639198218262806, |
|
"loss": 0.1714, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.008479436859488487, |
|
"learning_rate": 0.0001759465478841871, |
|
"loss": 0.0519, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.11945914477109909, |
|
"learning_rate": 0.00017550111358574611, |
|
"loss": 0.0858, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.7262502908706665, |
|
"learning_rate": 0.00017505567928730514, |
|
"loss": 0.0753, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.02038051374256611, |
|
"learning_rate": 0.00017461024498886417, |
|
"loss": 0.0571, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 9.348058700561523, |
|
"learning_rate": 0.00017416481069042317, |
|
"loss": 0.128, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.039238784462213516, |
|
"learning_rate": 0.0001737193763919822, |
|
"loss": 0.0551, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 5.897979736328125, |
|
"learning_rate": 0.00017327394209354122, |
|
"loss": 0.1287, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.9450980392156862, |
|
"eval_f1": 0.9450980392156862, |
|
"eval_loss": 0.27001550793647766, |
|
"eval_precision": 0.9450980392156862, |
|
"eval_recall": 0.9450980392156862, |
|
"eval_runtime": 17.21, |
|
"eval_samples_per_second": 59.268, |
|
"eval_steps_per_second": 7.438, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 5.809938907623291, |
|
"learning_rate": 0.00017282850779510022, |
|
"loss": 0.0894, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.013608383946120739, |
|
"learning_rate": 0.00017238307349665924, |
|
"loss": 0.0312, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.14918509125709534, |
|
"learning_rate": 0.00017193763919821827, |
|
"loss": 0.0813, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.09317727386951447, |
|
"learning_rate": 0.0001714922048997773, |
|
"loss": 0.1021, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.12424630671739578, |
|
"learning_rate": 0.00017104677060133632, |
|
"loss": 0.0382, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.004964092746376991, |
|
"learning_rate": 0.00017060133630289532, |
|
"loss": 0.0729, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 3.553861379623413, |
|
"learning_rate": 0.00017015590200445435, |
|
"loss": 0.0475, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.0814567431807518, |
|
"learning_rate": 0.00016971046770601338, |
|
"loss": 0.0424, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.5184776186943054, |
|
"learning_rate": 0.00016926503340757238, |
|
"loss": 0.0182, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.0049703894183039665, |
|
"learning_rate": 0.0001688195991091314, |
|
"loss": 0.0147, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.9205882352941176, |
|
"eval_f1": 0.9205882352941176, |
|
"eval_loss": 0.369125634431839, |
|
"eval_precision": 0.9205882352941176, |
|
"eval_recall": 0.9205882352941176, |
|
"eval_runtime": 16.9323, |
|
"eval_samples_per_second": 60.24, |
|
"eval_steps_per_second": 7.56, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.021510396152734756, |
|
"learning_rate": 0.00016837416481069043, |
|
"loss": 0.0821, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.06858960539102554, |
|
"learning_rate": 0.00016792873051224946, |
|
"loss": 0.0053, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.00354547961615026, |
|
"learning_rate": 0.00016748329621380848, |
|
"loss": 0.027, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.0027330678422003984, |
|
"learning_rate": 0.0001670378619153675, |
|
"loss": 0.0038, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.0024623360950499773, |
|
"learning_rate": 0.0001665924276169265, |
|
"loss": 0.0435, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.006295201368629932, |
|
"learning_rate": 0.00016614699331848553, |
|
"loss": 0.063, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.8971105813980103, |
|
"learning_rate": 0.00016570155902004456, |
|
"loss": 0.0625, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.004554128274321556, |
|
"learning_rate": 0.00016525612472160356, |
|
"loss": 0.004, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.1439096927642822, |
|
"learning_rate": 0.0001648106904231626, |
|
"loss": 0.0959, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.12275100499391556, |
|
"learning_rate": 0.0001643652561247216, |
|
"loss": 0.0416, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_accuracy": 0.9470588235294117, |
|
"eval_f1": 0.9470588235294117, |
|
"eval_loss": 0.25350436568260193, |
|
"eval_precision": 0.9470588235294117, |
|
"eval_recall": 0.9470588235294117, |
|
"eval_runtime": 16.7158, |
|
"eval_samples_per_second": 61.02, |
|
"eval_steps_per_second": 7.657, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.0064841569401323795, |
|
"learning_rate": 0.00016391982182628064, |
|
"loss": 0.0479, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.006001554429531097, |
|
"learning_rate": 0.00016347438752783967, |
|
"loss": 0.0624, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.1525709182024002, |
|
"learning_rate": 0.00016302895322939867, |
|
"loss": 0.0855, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.017199277877807617, |
|
"learning_rate": 0.0001625835189309577, |
|
"loss": 0.0199, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.40461423993110657, |
|
"learning_rate": 0.00016213808463251672, |
|
"loss": 0.0613, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.006222237832844257, |
|
"learning_rate": 0.00016169265033407572, |
|
"loss": 0.0168, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.0056493207812309265, |
|
"learning_rate": 0.00016124721603563474, |
|
"loss": 0.0247, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.004911198280751705, |
|
"learning_rate": 0.00016080178173719377, |
|
"loss": 0.074, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.0023081935942173004, |
|
"learning_rate": 0.0001603563474387528, |
|
"loss": 0.1029, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.010029925964772701, |
|
"learning_rate": 0.00015991091314031182, |
|
"loss": 0.0211, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9470588235294117, |
|
"eval_f1": 0.9470588235294117, |
|
"eval_loss": 0.25747954845428467, |
|
"eval_precision": 0.9470588235294117, |
|
"eval_recall": 0.9470588235294117, |
|
"eval_runtime": 16.8113, |
|
"eval_samples_per_second": 60.674, |
|
"eval_steps_per_second": 7.614, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.003727799979969859, |
|
"learning_rate": 0.00015946547884187085, |
|
"loss": 0.0126, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.7938788533210754, |
|
"learning_rate": 0.00015902004454342985, |
|
"loss": 0.0702, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.48882561922073364, |
|
"learning_rate": 0.00015857461024498888, |
|
"loss": 0.0513, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 10.071187973022461, |
|
"learning_rate": 0.0001581291759465479, |
|
"loss": 0.109, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.009675947949290276, |
|
"learning_rate": 0.0001576837416481069, |
|
"loss": 0.0034, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.004574100486934185, |
|
"learning_rate": 0.00015723830734966593, |
|
"loss": 0.0018, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.0033382533583790064, |
|
"learning_rate": 0.00015679287305122495, |
|
"loss": 0.0719, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 4.464893341064453, |
|
"learning_rate": 0.00015634743875278398, |
|
"loss": 0.0099, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.004051242955029011, |
|
"learning_rate": 0.000155902004454343, |
|
"loss": 0.0846, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.047729793936014175, |
|
"learning_rate": 0.000155456570155902, |
|
"loss": 0.088, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_accuracy": 0.9529411764705882, |
|
"eval_f1": 0.9529411764705882, |
|
"eval_loss": 0.19075074791908264, |
|
"eval_precision": 0.9529411764705882, |
|
"eval_recall": 0.9529411764705882, |
|
"eval_runtime": 16.8471, |
|
"eval_samples_per_second": 60.544, |
|
"eval_steps_per_second": 7.598, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.008470825850963593, |
|
"learning_rate": 0.00015501113585746103, |
|
"loss": 0.0022, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 2.529259204864502, |
|
"learning_rate": 0.00015456570155902006, |
|
"loss": 0.0917, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.3583894670009613, |
|
"learning_rate": 0.00015412026726057906, |
|
"loss": 0.0793, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.039421286433935165, |
|
"learning_rate": 0.00015367483296213809, |
|
"loss": 0.0742, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.2551974654197693, |
|
"learning_rate": 0.0001532293986636971, |
|
"loss": 0.0029, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.3370533883571625, |
|
"learning_rate": 0.00015278396436525614, |
|
"loss": 0.0563, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 1.642697811126709, |
|
"learning_rate": 0.00015233853006681517, |
|
"loss": 0.0362, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 12.125362396240234, |
|
"learning_rate": 0.0001518930957683742, |
|
"loss": 0.0949, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.0022143302485346794, |
|
"learning_rate": 0.0001514476614699332, |
|
"loss": 0.0219, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.0026613217778503895, |
|
"learning_rate": 0.00015100222717149222, |
|
"loss": 0.1849, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.9529411764705882, |
|
"eval_f1": 0.9529411764705882, |
|
"eval_loss": 0.2200697511434555, |
|
"eval_precision": 0.9529411764705882, |
|
"eval_recall": 0.9529411764705882, |
|
"eval_runtime": 16.9063, |
|
"eval_samples_per_second": 60.332, |
|
"eval_steps_per_second": 7.571, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.5232903957366943, |
|
"learning_rate": 0.00015055679287305122, |
|
"loss": 0.0013, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 1.5729717016220093, |
|
"learning_rate": 0.00015011135857461024, |
|
"loss": 0.0433, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.005640827585011721, |
|
"learning_rate": 0.00014966592427616927, |
|
"loss": 0.0007, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 7.901826858520508, |
|
"learning_rate": 0.0001492204899777283, |
|
"loss": 0.1131, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.4438769221305847, |
|
"learning_rate": 0.00014877505567928732, |
|
"loss": 0.0557, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 0.0032986474689096212, |
|
"learning_rate": 0.00014832962138084635, |
|
"loss": 0.001, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 0.09250050783157349, |
|
"learning_rate": 0.00014788418708240535, |
|
"loss": 0.02, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 0.005981163587421179, |
|
"learning_rate": 0.00014743875278396438, |
|
"loss": 0.0147, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.02270863950252533, |
|
"learning_rate": 0.0001469933184855234, |
|
"loss": 0.0246, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.014892498031258583, |
|
"learning_rate": 0.0001465478841870824, |
|
"loss": 0.0009, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_accuracy": 0.9549019607843138, |
|
"eval_f1": 0.9549019607843138, |
|
"eval_loss": 0.22289611399173737, |
|
"eval_precision": 0.9549019607843138, |
|
"eval_recall": 0.9549019607843138, |
|
"eval_runtime": 16.9051, |
|
"eval_samples_per_second": 60.337, |
|
"eval_steps_per_second": 7.572, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 10.428900718688965, |
|
"learning_rate": 0.00014610244988864143, |
|
"loss": 0.0115, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 0.0370117723941803, |
|
"learning_rate": 0.00014565701559020045, |
|
"loss": 0.0525, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.0029497628565877676, |
|
"learning_rate": 0.00014521158129175948, |
|
"loss": 0.0009, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.8202322125434875, |
|
"learning_rate": 0.0001447661469933185, |
|
"loss": 0.051, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 0.012352533638477325, |
|
"learning_rate": 0.0001443207126948775, |
|
"loss": 0.0034, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.5958288908004761, |
|
"learning_rate": 0.00014387527839643653, |
|
"loss": 0.0016, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 0.01864822395145893, |
|
"learning_rate": 0.00014342984409799556, |
|
"loss": 0.0113, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.053810037672519684, |
|
"learning_rate": 0.00014298440979955456, |
|
"loss": 0.0044, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.07594209164381027, |
|
"learning_rate": 0.00014253897550111359, |
|
"loss": 0.0022, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.003192998468875885, |
|
"learning_rate": 0.0001420935412026726, |
|
"loss": 0.0599, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.9607843137254902, |
|
"eval_f1": 0.9607843137254902, |
|
"eval_loss": 0.1780730038881302, |
|
"eval_precision": 0.9607843137254902, |
|
"eval_recall": 0.9607843137254902, |
|
"eval_runtime": 16.8581, |
|
"eval_samples_per_second": 60.505, |
|
"eval_steps_per_second": 7.593, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.0019150603329762816, |
|
"learning_rate": 0.00014164810690423164, |
|
"loss": 0.0136, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.0017021102830767632, |
|
"learning_rate": 0.00014120267260579067, |
|
"loss": 0.0004, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.002855106256902218, |
|
"learning_rate": 0.0001407572383073497, |
|
"loss": 0.0028, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.0011817626655101776, |
|
"learning_rate": 0.0001403118040089087, |
|
"loss": 0.0146, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 11.652885437011719, |
|
"learning_rate": 0.00013986636971046772, |
|
"loss": 0.0155, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.002146989107131958, |
|
"learning_rate": 0.00013942093541202674, |
|
"loss": 0.007, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.0012873058440163732, |
|
"learning_rate": 0.00013897550111358574, |
|
"loss": 0.0005, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.001976664876565337, |
|
"learning_rate": 0.00013853006681514477, |
|
"loss": 0.0067, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.004611461888998747, |
|
"learning_rate": 0.00013808463251670377, |
|
"loss": 0.068, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.05266120657324791, |
|
"learning_rate": 0.00013763919821826282, |
|
"loss": 0.0004, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.9666666666666667, |
|
"eval_f1": 0.9666666666666667, |
|
"eval_loss": 0.1750936210155487, |
|
"eval_precision": 0.9666666666666667, |
|
"eval_recall": 0.9666666666666667, |
|
"eval_runtime": 16.8283, |
|
"eval_samples_per_second": 60.612, |
|
"eval_steps_per_second": 7.606, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.005359290167689323, |
|
"learning_rate": 0.00013719376391982185, |
|
"loss": 0.038, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 0.0019557911437004805, |
|
"learning_rate": 0.00013674832962138085, |
|
"loss": 0.0006, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 0.002684570848941803, |
|
"learning_rate": 0.00013630289532293988, |
|
"loss": 0.0179, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 2.459765911102295, |
|
"learning_rate": 0.0001358574610244989, |
|
"loss": 0.0164, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"grad_norm": 0.010600595735013485, |
|
"learning_rate": 0.0001354120267260579, |
|
"loss": 0.0008, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 0.12665680050849915, |
|
"learning_rate": 0.00013496659242761693, |
|
"loss": 0.1161, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 0.0013257049722597003, |
|
"learning_rate": 0.00013452115812917595, |
|
"loss": 0.0718, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 2.888887882232666, |
|
"learning_rate": 0.00013407572383073498, |
|
"loss": 0.0028, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"grad_norm": 0.0022232867777347565, |
|
"learning_rate": 0.000133630289532294, |
|
"loss": 0.0009, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"grad_norm": 0.0026971769984811544, |
|
"learning_rate": 0.00013318485523385303, |
|
"loss": 0.0004, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_accuracy": 0.9686274509803922, |
|
"eval_f1": 0.9686274509803922, |
|
"eval_loss": 0.1684277504682541, |
|
"eval_precision": 0.9686274509803922, |
|
"eval_recall": 0.9686274509803922, |
|
"eval_runtime": 16.8277, |
|
"eval_samples_per_second": 60.614, |
|
"eval_steps_per_second": 7.606, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"grad_norm": 0.06871479004621506, |
|
"learning_rate": 0.00013273942093541203, |
|
"loss": 0.0004, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"grad_norm": 0.010680126026272774, |
|
"learning_rate": 0.00013229398663697106, |
|
"loss": 0.0623, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"grad_norm": 0.0024642229545861483, |
|
"learning_rate": 0.00013184855233853006, |
|
"loss": 0.0005, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 0.0014489213936030865, |
|
"learning_rate": 0.00013140311804008909, |
|
"loss": 0.0004, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.004346741829067469, |
|
"learning_rate": 0.0001309576837416481, |
|
"loss": 0.0025, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 0.006028163246810436, |
|
"learning_rate": 0.0001305122494432071, |
|
"loss": 0.0546, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 0.0034053712151944637, |
|
"learning_rate": 0.00013006681514476616, |
|
"loss": 0.028, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 0.004035326186567545, |
|
"learning_rate": 0.0001296213808463252, |
|
"loss": 0.0042, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"grad_norm": 0.0025597705971449614, |
|
"learning_rate": 0.0001291759465478842, |
|
"loss": 0.0201, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 0.0010411799885332584, |
|
"learning_rate": 0.00012873051224944322, |
|
"loss": 0.0352, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_accuracy": 0.9754901960784313, |
|
"eval_f1": 0.9754901960784313, |
|
"eval_loss": 0.15021921694278717, |
|
"eval_precision": 0.9754901960784313, |
|
"eval_recall": 0.9754901960784313, |
|
"eval_runtime": 16.7579, |
|
"eval_samples_per_second": 60.867, |
|
"eval_steps_per_second": 7.638, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"grad_norm": 0.04698515310883522, |
|
"learning_rate": 0.00012828507795100224, |
|
"loss": 0.0036, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 0.013074472546577454, |
|
"learning_rate": 0.00012783964365256124, |
|
"loss": 0.0626, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"grad_norm": 0.002302026841789484, |
|
"learning_rate": 0.00012739420935412027, |
|
"loss": 0.0004, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 0.012451753951609135, |
|
"learning_rate": 0.0001269487750556793, |
|
"loss": 0.0418, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"grad_norm": 0.010159431956708431, |
|
"learning_rate": 0.00012650334075723832, |
|
"loss": 0.0008, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 0.20833130180835724, |
|
"learning_rate": 0.00012605790645879735, |
|
"loss": 0.0006, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"grad_norm": 0.0008345023961737752, |
|
"learning_rate": 0.00012561247216035635, |
|
"loss": 0.0013, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 0.0027376762591302395, |
|
"learning_rate": 0.00012516703786191537, |
|
"loss": 0.0003, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.0012037215055897832, |
|
"learning_rate": 0.0001247216035634744, |
|
"loss": 0.0003, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"grad_norm": 0.02287732996046543, |
|
"learning_rate": 0.0001242761692650334, |
|
"loss": 0.0003, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.9745098039215686, |
|
"eval_f1": 0.9745098039215686, |
|
"eval_loss": 0.15970657765865326, |
|
"eval_precision": 0.9745098039215686, |
|
"eval_recall": 0.9745098039215686, |
|
"eval_runtime": 16.8382, |
|
"eval_samples_per_second": 60.576, |
|
"eval_steps_per_second": 7.602, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 0.0013968138955533504, |
|
"learning_rate": 0.00012383073496659243, |
|
"loss": 0.0005, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"grad_norm": 0.003733535995706916, |
|
"learning_rate": 0.00012338530066815145, |
|
"loss": 0.0003, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 0.976151168346405, |
|
"learning_rate": 0.00012293986636971045, |
|
"loss": 0.001, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"grad_norm": 0.003409826662391424, |
|
"learning_rate": 0.0001224944320712695, |
|
"loss": 0.0003, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 0.004897921811789274, |
|
"learning_rate": 0.00012204899777282852, |
|
"loss": 0.0013, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.0010848743841052055, |
|
"learning_rate": 0.00012160356347438753, |
|
"loss": 0.0003, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"grad_norm": 0.0013221738627180457, |
|
"learning_rate": 0.00012115812917594656, |
|
"loss": 0.0002, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 0.09072667360305786, |
|
"learning_rate": 0.00012071269487750559, |
|
"loss": 0.0029, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"grad_norm": 0.00137105374597013, |
|
"learning_rate": 0.00012026726057906458, |
|
"loss": 0.0007, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.004980257712304592, |
|
"learning_rate": 0.00011982182628062361, |
|
"loss": 0.0003, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_accuracy": 0.9558823529411765, |
|
"eval_f1": 0.9558823529411765, |
|
"eval_loss": 0.25734347105026245, |
|
"eval_precision": 0.9558823529411765, |
|
"eval_recall": 0.9558823529411765, |
|
"eval_runtime": 16.754, |
|
"eval_samples_per_second": 60.881, |
|
"eval_steps_per_second": 7.64, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.0013974602334201336, |
|
"learning_rate": 0.00011937639198218265, |
|
"loss": 0.0015, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.0027338312938809395, |
|
"learning_rate": 0.00011893095768374165, |
|
"loss": 0.0028, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.0018153024138882756, |
|
"learning_rate": 0.00011848552338530068, |
|
"loss": 0.0003, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 0.0008053297642618418, |
|
"learning_rate": 0.00011804008908685969, |
|
"loss": 0.0145, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 0.0016097394982352853, |
|
"learning_rate": 0.00011759465478841872, |
|
"loss": 0.0003, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 0.0011555146193131804, |
|
"learning_rate": 0.00011714922048997774, |
|
"loss": 0.0007, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 0.0013847779482603073, |
|
"learning_rate": 0.00011670378619153674, |
|
"loss": 0.0005, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"grad_norm": 0.026971347630023956, |
|
"learning_rate": 0.00011625835189309577, |
|
"loss": 0.04, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 0.004224107600748539, |
|
"learning_rate": 0.0001158129175946548, |
|
"loss": 0.0004, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"grad_norm": 0.008265385404229164, |
|
"learning_rate": 0.00011536748329621381, |
|
"loss": 0.0005, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_accuracy": 0.9666666666666667, |
|
"eval_f1": 0.9666666666666667, |
|
"eval_loss": 0.19066497683525085, |
|
"eval_precision": 0.9666666666666667, |
|
"eval_recall": 0.9666666666666667, |
|
"eval_runtime": 16.876, |
|
"eval_samples_per_second": 60.441, |
|
"eval_steps_per_second": 7.585, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"grad_norm": 0.0009556623990647495, |
|
"learning_rate": 0.00011492204899777283, |
|
"loss": 0.0103, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"grad_norm": 0.0008651684038341045, |
|
"learning_rate": 0.00011447661469933186, |
|
"loss": 0.0003, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"grad_norm": 0.0021915507968515158, |
|
"learning_rate": 0.00011403118040089087, |
|
"loss": 0.0017, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"grad_norm": 0.993601381778717, |
|
"learning_rate": 0.0001135857461024499, |
|
"loss": 0.0016, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"grad_norm": 0.012279433198273182, |
|
"learning_rate": 0.00011314031180400893, |
|
"loss": 0.0004, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"grad_norm": 0.0192144475877285, |
|
"learning_rate": 0.00011269487750556793, |
|
"loss": 0.0211, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 0.002850558841601014, |
|
"learning_rate": 0.00011224944320712695, |
|
"loss": 0.0011, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"grad_norm": 0.016757028177380562, |
|
"learning_rate": 0.00011180400890868597, |
|
"loss": 0.0004, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"grad_norm": 0.009729539044201374, |
|
"learning_rate": 0.00011135857461024499, |
|
"loss": 0.0005, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"grad_norm": 0.0012516066199168563, |
|
"learning_rate": 0.00011091314031180402, |
|
"loss": 0.0741, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_accuracy": 0.9637254901960784, |
|
"eval_f1": 0.9637254901960784, |
|
"eval_loss": 0.20377103984355927, |
|
"eval_precision": 0.9637254901960784, |
|
"eval_recall": 0.9637254901960784, |
|
"eval_runtime": 16.9265, |
|
"eval_samples_per_second": 60.26, |
|
"eval_steps_per_second": 7.562, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 0.03771669417619705, |
|
"learning_rate": 0.00011046770601336303, |
|
"loss": 0.0005, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 0.0029582425486296415, |
|
"learning_rate": 0.00011002227171492206, |
|
"loss": 0.044, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"grad_norm": 0.0017603106098249555, |
|
"learning_rate": 0.00010957683741648108, |
|
"loss": 0.0002, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"grad_norm": 0.006093455944210291, |
|
"learning_rate": 0.00010913140311804008, |
|
"loss": 0.0004, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.024271611124277115, |
|
"learning_rate": 0.00010868596881959911, |
|
"loss": 0.0004, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"grad_norm": 0.0059431749396026134, |
|
"learning_rate": 0.00010824053452115814, |
|
"loss": 0.0171, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"grad_norm": 0.001350950333289802, |
|
"learning_rate": 0.00010779510022271715, |
|
"loss": 0.0169, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"grad_norm": 0.11208397895097733, |
|
"learning_rate": 0.00010734966592427618, |
|
"loss": 0.0009, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 0.000902643718291074, |
|
"learning_rate": 0.0001069042316258352, |
|
"loss": 0.0002, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"grad_norm": 0.0008040807442739606, |
|
"learning_rate": 0.00010645879732739422, |
|
"loss": 0.0025, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_accuracy": 0.9647058823529412, |
|
"eval_f1": 0.9647058823529412, |
|
"eval_loss": 0.1929028332233429, |
|
"eval_precision": 0.9647058823529412, |
|
"eval_recall": 0.9647058823529412, |
|
"eval_runtime": 17.2457, |
|
"eval_samples_per_second": 59.145, |
|
"eval_steps_per_second": 7.422, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"grad_norm": 0.001127121620811522, |
|
"learning_rate": 0.00010601336302895324, |
|
"loss": 0.0025, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 0.0010411780094727874, |
|
"learning_rate": 0.00010556792873051224, |
|
"loss": 0.0002, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"grad_norm": 0.0012262547388672829, |
|
"learning_rate": 0.00010512249443207127, |
|
"loss": 0.0031, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"grad_norm": 0.06668848544359207, |
|
"learning_rate": 0.0001046770601336303, |
|
"loss": 0.0011, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"grad_norm": 11.083710670471191, |
|
"learning_rate": 0.00010423162583518931, |
|
"loss": 0.1101, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"grad_norm": 0.0041348133236169815, |
|
"learning_rate": 0.00010378619153674833, |
|
"loss": 0.0312, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"grad_norm": 0.0014749247347936034, |
|
"learning_rate": 0.00010334075723830736, |
|
"loss": 0.002, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 0.00816721748560667, |
|
"learning_rate": 0.00010289532293986637, |
|
"loss": 0.0374, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"grad_norm": 0.0029881075024604797, |
|
"learning_rate": 0.0001024498886414254, |
|
"loss": 0.0245, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 0.01441921480000019, |
|
"learning_rate": 0.00010200445434298443, |
|
"loss": 0.0293, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.9607843137254902, |
|
"eval_f1": 0.9607843137254902, |
|
"eval_loss": 0.17395375669002533, |
|
"eval_precision": 0.9607843137254902, |
|
"eval_recall": 0.9607843137254902, |
|
"eval_runtime": 17.1376, |
|
"eval_samples_per_second": 59.518, |
|
"eval_steps_per_second": 7.469, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"grad_norm": 0.005346087273210287, |
|
"learning_rate": 0.00010155902004454343, |
|
"loss": 0.054, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"grad_norm": 0.0087255435064435, |
|
"learning_rate": 0.00010111358574610245, |
|
"loss": 0.0008, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"grad_norm": 0.0033257934264838696, |
|
"learning_rate": 0.00010066815144766148, |
|
"loss": 0.0034, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"grad_norm": 0.0017741642659530044, |
|
"learning_rate": 0.00010022271714922049, |
|
"loss": 0.0008, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.03513794392347336, |
|
"learning_rate": 9.977728285077952e-05, |
|
"loss": 0.0064, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.0020874643232673407, |
|
"learning_rate": 9.933184855233853e-05, |
|
"loss": 0.0003, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.0035891502629965544, |
|
"learning_rate": 9.888641425389756e-05, |
|
"loss": 0.0489, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.001030069775879383, |
|
"learning_rate": 9.844097995545658e-05, |
|
"loss": 0.0011, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.053950581699609756, |
|
"learning_rate": 9.79955456570156e-05, |
|
"loss": 0.0016, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"grad_norm": 0.0023863562382757664, |
|
"learning_rate": 9.755011135857461e-05, |
|
"loss": 0.0003, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_accuracy": 0.9568627450980393, |
|
"eval_f1": 0.9568627450980393, |
|
"eval_loss": 0.25984036922454834, |
|
"eval_precision": 0.9568627450980393, |
|
"eval_recall": 0.9568627450980393, |
|
"eval_runtime": 17.3202, |
|
"eval_samples_per_second": 58.891, |
|
"eval_steps_per_second": 7.39, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"grad_norm": 0.054378170520067215, |
|
"learning_rate": 9.710467706013364e-05, |
|
"loss": 0.0005, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"grad_norm": 0.003204792272299528, |
|
"learning_rate": 9.665924276169266e-05, |
|
"loss": 0.0485, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"grad_norm": 0.001054179621860385, |
|
"learning_rate": 9.621380846325168e-05, |
|
"loss": 0.0456, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 0.0008275459986180067, |
|
"learning_rate": 9.576837416481069e-05, |
|
"loss": 0.0003, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"grad_norm": 0.06051745265722275, |
|
"learning_rate": 9.532293986636972e-05, |
|
"loss": 0.0005, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"grad_norm": 0.0037743300199508667, |
|
"learning_rate": 9.487750556792874e-05, |
|
"loss": 0.0034, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"grad_norm": 0.00043078724411316216, |
|
"learning_rate": 9.443207126948775e-05, |
|
"loss": 0.0004, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"grad_norm": 0.001160395797342062, |
|
"learning_rate": 9.398663697104677e-05, |
|
"loss": 0.0009, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"grad_norm": 0.0006127849337644875, |
|
"learning_rate": 9.35412026726058e-05, |
|
"loss": 0.0001, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"grad_norm": 0.0023824446834623814, |
|
"learning_rate": 9.309576837416482e-05, |
|
"loss": 0.0037, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_accuracy": 0.961764705882353, |
|
"eval_f1": 0.961764705882353, |
|
"eval_loss": 0.17718201875686646, |
|
"eval_precision": 0.961764705882353, |
|
"eval_recall": 0.961764705882353, |
|
"eval_runtime": 17.3617, |
|
"eval_samples_per_second": 58.75, |
|
"eval_steps_per_second": 7.373, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"grad_norm": 0.0015472627710551023, |
|
"learning_rate": 9.265033407572383e-05, |
|
"loss": 0.0132, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"grad_norm": 0.002203689655289054, |
|
"learning_rate": 9.220489977728286e-05, |
|
"loss": 0.0003, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"grad_norm": 0.003569718450307846, |
|
"learning_rate": 9.175946547884187e-05, |
|
"loss": 0.0006, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"grad_norm": 0.0020932252518832684, |
|
"learning_rate": 9.13140311804009e-05, |
|
"loss": 0.0051, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"grad_norm": 0.0030166106298565865, |
|
"learning_rate": 9.086859688195991e-05, |
|
"loss": 0.0008, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"grad_norm": 0.001667293719947338, |
|
"learning_rate": 9.042316258351894e-05, |
|
"loss": 0.0004, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 0.0005573901580646634, |
|
"learning_rate": 8.997772828507795e-05, |
|
"loss": 0.0097, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"grad_norm": 0.0013365427730605006, |
|
"learning_rate": 8.953229398663698e-05, |
|
"loss": 0.0002, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"grad_norm": 0.0031586128752678633, |
|
"learning_rate": 8.9086859688196e-05, |
|
"loss": 0.0022, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"grad_norm": 0.21919859945774078, |
|
"learning_rate": 8.864142538975502e-05, |
|
"loss": 0.0213, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_accuracy": 0.9519607843137254, |
|
"eval_f1": 0.9519607843137254, |
|
"eval_loss": 0.2910812199115753, |
|
"eval_precision": 0.9519607843137254, |
|
"eval_recall": 0.9519607843137254, |
|
"eval_runtime": 17.3785, |
|
"eval_samples_per_second": 58.693, |
|
"eval_steps_per_second": 7.365, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"grad_norm": 0.002107376931235194, |
|
"learning_rate": 8.819599109131403e-05, |
|
"loss": 0.001, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"grad_norm": 11.032261848449707, |
|
"learning_rate": 8.775055679287306e-05, |
|
"loss": 0.0331, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"grad_norm": 0.0010789623484015465, |
|
"learning_rate": 8.730512249443208e-05, |
|
"loss": 0.0002, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"grad_norm": 0.0013054576702415943, |
|
"learning_rate": 8.68596881959911e-05, |
|
"loss": 0.0019, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"grad_norm": 0.001707877148874104, |
|
"learning_rate": 8.641425389755011e-05, |
|
"loss": 0.0004, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"grad_norm": 0.0006996811716817319, |
|
"learning_rate": 8.596881959910914e-05, |
|
"loss": 0.0172, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"grad_norm": 0.00302655971609056, |
|
"learning_rate": 8.552338530066816e-05, |
|
"loss": 0.0461, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"grad_norm": 0.0005682900664396584, |
|
"learning_rate": 8.507795100222718e-05, |
|
"loss": 0.0002, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"grad_norm": 0.0009555260185152292, |
|
"learning_rate": 8.463251670378619e-05, |
|
"loss": 0.0002, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"grad_norm": 0.020968729630112648, |
|
"learning_rate": 8.418708240534521e-05, |
|
"loss": 0.027, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_accuracy": 0.9519607843137254, |
|
"eval_f1": 0.9519607843137254, |
|
"eval_loss": 0.25403299927711487, |
|
"eval_precision": 0.9519607843137254, |
|
"eval_recall": 0.9519607843137254, |
|
"eval_runtime": 17.2499, |
|
"eval_samples_per_second": 59.131, |
|
"eval_steps_per_second": 7.42, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"grad_norm": 0.10158411413431168, |
|
"learning_rate": 8.374164810690424e-05, |
|
"loss": 0.0003, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 0.016370078548789024, |
|
"learning_rate": 8.329621380846325e-05, |
|
"loss": 0.0981, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"grad_norm": 0.0012518821749836206, |
|
"learning_rate": 8.285077951002228e-05, |
|
"loss": 0.0004, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"grad_norm": 0.0029411010909825563, |
|
"learning_rate": 8.24053452115813e-05, |
|
"loss": 0.0007, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"grad_norm": 0.000937216158490628, |
|
"learning_rate": 8.195991091314032e-05, |
|
"loss": 0.0177, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"grad_norm": 0.0034318570978939533, |
|
"learning_rate": 8.151447661469933e-05, |
|
"loss": 0.0312, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"grad_norm": 0.0007262133876793087, |
|
"learning_rate": 8.106904231625836e-05, |
|
"loss": 0.0002, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"grad_norm": 0.0007804339984431863, |
|
"learning_rate": 8.062360801781737e-05, |
|
"loss": 0.0002, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"grad_norm": 0.001342720352113247, |
|
"learning_rate": 8.01781737193764e-05, |
|
"loss": 0.0002, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.001933095627464354, |
|
"learning_rate": 7.973273942093543e-05, |
|
"loss": 0.0155, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"eval_accuracy": 0.9549019607843138, |
|
"eval_f1": 0.9549019607843138, |
|
"eval_loss": 0.22524712979793549, |
|
"eval_precision": 0.9549019607843138, |
|
"eval_recall": 0.9549019607843138, |
|
"eval_runtime": 17.4105, |
|
"eval_samples_per_second": 58.585, |
|
"eval_steps_per_second": 7.352, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0011171975638717413, |
|
"learning_rate": 7.928730512249444e-05, |
|
"loss": 0.0002, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.001169139752164483, |
|
"learning_rate": 7.884187082405345e-05, |
|
"loss": 0.0002, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.002055455232039094, |
|
"learning_rate": 7.839643652561248e-05, |
|
"loss": 0.0082, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.05008271709084511, |
|
"learning_rate": 7.79510022271715e-05, |
|
"loss": 0.0321, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.000607622554525733, |
|
"learning_rate": 7.750556792873052e-05, |
|
"loss": 0.0397, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"grad_norm": 0.000795868574641645, |
|
"learning_rate": 7.706013363028953e-05, |
|
"loss": 0.0064, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"grad_norm": 0.010306187905371189, |
|
"learning_rate": 7.661469933184856e-05, |
|
"loss": 0.004, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"grad_norm": 0.001497789635322988, |
|
"learning_rate": 7.616926503340758e-05, |
|
"loss": 0.005, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"grad_norm": 0.12043255567550659, |
|
"learning_rate": 7.57238307349666e-05, |
|
"loss": 0.003, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"grad_norm": 0.0034782905131578445, |
|
"learning_rate": 7.527839643652561e-05, |
|
"loss": 0.0002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"eval_accuracy": 0.9431372549019608, |
|
"eval_f1": 0.9431372549019608, |
|
"eval_loss": 0.3040062189102173, |
|
"eval_precision": 0.9431372549019608, |
|
"eval_recall": 0.9431372549019608, |
|
"eval_runtime": 17.1818, |
|
"eval_samples_per_second": 59.365, |
|
"eval_steps_per_second": 7.45, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"grad_norm": 5.399389266967773, |
|
"learning_rate": 7.483296213808464e-05, |
|
"loss": 0.0092, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"grad_norm": 0.0007157580694183707, |
|
"learning_rate": 7.438752783964366e-05, |
|
"loss": 0.0134, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"grad_norm": 0.005470567848533392, |
|
"learning_rate": 7.394209354120267e-05, |
|
"loss": 0.147, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"grad_norm": 0.01675906591117382, |
|
"learning_rate": 7.34966592427617e-05, |
|
"loss": 0.0813, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"grad_norm": 5.728336334228516, |
|
"learning_rate": 7.305122494432071e-05, |
|
"loss": 0.0275, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"grad_norm": 0.003522884799167514, |
|
"learning_rate": 7.260579064587974e-05, |
|
"loss": 0.0109, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"grad_norm": 0.026970118284225464, |
|
"learning_rate": 7.216035634743875e-05, |
|
"loss": 0.0004, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"grad_norm": 0.12984509766101837, |
|
"learning_rate": 7.171492204899778e-05, |
|
"loss": 0.0007, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"grad_norm": 0.009168056771159172, |
|
"learning_rate": 7.126948775055679e-05, |
|
"loss": 0.0006, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"grad_norm": 0.0009597400785423815, |
|
"learning_rate": 7.082405345211582e-05, |
|
"loss": 0.011, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"eval_accuracy": 0.9598039215686275, |
|
"eval_f1": 0.9598039215686275, |
|
"eval_loss": 0.1923176795244217, |
|
"eval_precision": 0.9598039215686275, |
|
"eval_recall": 0.9598039215686275, |
|
"eval_runtime": 17.225, |
|
"eval_samples_per_second": 59.216, |
|
"eval_steps_per_second": 7.431, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"grad_norm": 0.0009739417000673711, |
|
"learning_rate": 7.037861915367485e-05, |
|
"loss": 0.0014, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"grad_norm": 0.0022935476154088974, |
|
"learning_rate": 6.993318485523386e-05, |
|
"loss": 0.0133, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"grad_norm": 0.0005638069123961031, |
|
"learning_rate": 6.948775055679287e-05, |
|
"loss": 0.0009, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"grad_norm": 0.014625852927565575, |
|
"learning_rate": 6.904231625835188e-05, |
|
"loss": 0.0006, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"grad_norm": 0.001474756863899529, |
|
"learning_rate": 6.859688195991092e-05, |
|
"loss": 0.0128, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"grad_norm": 0.0029620621353387833, |
|
"learning_rate": 6.815144766146994e-05, |
|
"loss": 0.0002, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"grad_norm": 0.0016939816996455193, |
|
"learning_rate": 6.770601336302895e-05, |
|
"loss": 0.0003, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"grad_norm": 0.0009252108866348863, |
|
"learning_rate": 6.726057906458798e-05, |
|
"loss": 0.0011, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"grad_norm": 0.1285027265548706, |
|
"learning_rate": 6.6815144766147e-05, |
|
"loss": 0.0071, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"grad_norm": 0.00125114805996418, |
|
"learning_rate": 6.636971046770602e-05, |
|
"loss": 0.0006, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"eval_accuracy": 0.9637254901960784, |
|
"eval_f1": 0.9637254901960784, |
|
"eval_loss": 0.20890936255455017, |
|
"eval_precision": 0.9637254901960784, |
|
"eval_recall": 0.9637254901960784, |
|
"eval_runtime": 17.1679, |
|
"eval_samples_per_second": 59.413, |
|
"eval_steps_per_second": 7.456, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"grad_norm": 0.0017677777213975787, |
|
"learning_rate": 6.592427616926503e-05, |
|
"loss": 0.03, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"grad_norm": 0.0006067939684726298, |
|
"learning_rate": 6.547884187082406e-05, |
|
"loss": 0.0003, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"grad_norm": 0.0010102881351485848, |
|
"learning_rate": 6.503340757238308e-05, |
|
"loss": 0.0002, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"grad_norm": 0.0009144017240032554, |
|
"learning_rate": 6.45879732739421e-05, |
|
"loss": 0.0282, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"grad_norm": 0.0007274287054315209, |
|
"learning_rate": 6.414253897550112e-05, |
|
"loss": 0.0002, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"grad_norm": 0.0034935837611556053, |
|
"learning_rate": 6.369710467706013e-05, |
|
"loss": 0.0046, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"grad_norm": 0.004235483705997467, |
|
"learning_rate": 6.325167037861916e-05, |
|
"loss": 0.0002, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"grad_norm": 0.0013753636740148067, |
|
"learning_rate": 6.280623608017817e-05, |
|
"loss": 0.0101, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 0.0008035491337068379, |
|
"learning_rate": 6.23608017817372e-05, |
|
"loss": 0.0002, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"grad_norm": 0.00173095241189003, |
|
"learning_rate": 6.191536748329621e-05, |
|
"loss": 0.0002, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_accuracy": 0.957843137254902, |
|
"eval_f1": 0.957843137254902, |
|
"eval_loss": 0.22062458097934723, |
|
"eval_precision": 0.957843137254902, |
|
"eval_recall": 0.957843137254902, |
|
"eval_runtime": 17.3063, |
|
"eval_samples_per_second": 58.938, |
|
"eval_steps_per_second": 7.396, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"grad_norm": 0.001496517681516707, |
|
"learning_rate": 6.146993318485523e-05, |
|
"loss": 0.0002, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"grad_norm": 0.0005152082885615528, |
|
"learning_rate": 6.102449888641426e-05, |
|
"loss": 0.0002, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"grad_norm": 0.0005918457172811031, |
|
"learning_rate": 6.057906458797328e-05, |
|
"loss": 0.0192, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"grad_norm": 0.000515251827891916, |
|
"learning_rate": 6.013363028953229e-05, |
|
"loss": 0.0003, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.00043858023127540946, |
|
"learning_rate": 5.9688195991091325e-05, |
|
"loss": 0.0105, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0018106413772329688, |
|
"learning_rate": 5.924276169265034e-05, |
|
"loss": 0.0059, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.000563229201361537, |
|
"learning_rate": 5.879732739420936e-05, |
|
"loss": 0.0003, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.001515958341769874, |
|
"learning_rate": 5.835189309576837e-05, |
|
"loss": 0.0058, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.0005047390004619956, |
|
"learning_rate": 5.79064587973274e-05, |
|
"loss": 0.0002, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"grad_norm": 0.03174121677875519, |
|
"learning_rate": 5.746102449888642e-05, |
|
"loss": 0.0006, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"eval_accuracy": 0.9627450980392157, |
|
"eval_f1": 0.9627450980392157, |
|
"eval_loss": 0.22668223083019257, |
|
"eval_precision": 0.9627450980392157, |
|
"eval_recall": 0.9627450980392157, |
|
"eval_runtime": 16.905, |
|
"eval_samples_per_second": 60.337, |
|
"eval_steps_per_second": 7.572, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"grad_norm": 0.0019246222218498588, |
|
"learning_rate": 5.701559020044544e-05, |
|
"loss": 0.0002, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"grad_norm": 8.673022270202637, |
|
"learning_rate": 5.6570155902004463e-05, |
|
"loss": 0.0058, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"grad_norm": 0.0006804656004533172, |
|
"learning_rate": 5.6124721603563476e-05, |
|
"loss": 0.0002, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"grad_norm": 0.0013651803601533175, |
|
"learning_rate": 5.5679287305122496e-05, |
|
"loss": 0.0007, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"grad_norm": 0.0014620161382481456, |
|
"learning_rate": 5.5233853006681516e-05, |
|
"loss": 0.0002, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"grad_norm": 0.0009020831785164773, |
|
"learning_rate": 5.478841870824054e-05, |
|
"loss": 0.0003, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"grad_norm": 0.0010328377829864621, |
|
"learning_rate": 5.4342984409799555e-05, |
|
"loss": 0.0016, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"grad_norm": 0.002698230091482401, |
|
"learning_rate": 5.3897550111358575e-05, |
|
"loss": 0.0002, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"grad_norm": 0.0025662758853286505, |
|
"learning_rate": 5.34521158129176e-05, |
|
"loss": 0.0002, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"grad_norm": 0.0005825618281960487, |
|
"learning_rate": 5.300668151447662e-05, |
|
"loss": 0.0001, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_accuracy": 0.9637254901960784, |
|
"eval_f1": 0.9637254901960784, |
|
"eval_loss": 0.17346832156181335, |
|
"eval_precision": 0.9637254901960784, |
|
"eval_recall": 0.9637254901960784, |
|
"eval_runtime": 17.0715, |
|
"eval_samples_per_second": 59.749, |
|
"eval_steps_per_second": 7.498, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 0.001066404627636075, |
|
"learning_rate": 5.2561247216035634e-05, |
|
"loss": 0.0001, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"grad_norm": 0.0007172970799729228, |
|
"learning_rate": 5.2115812917594654e-05, |
|
"loss": 0.0002, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"grad_norm": 0.000634915370028466, |
|
"learning_rate": 5.167037861915368e-05, |
|
"loss": 0.0001, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"grad_norm": 0.004406619351357222, |
|
"learning_rate": 5.12249443207127e-05, |
|
"loss": 0.0001, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"grad_norm": 0.015614562667906284, |
|
"learning_rate": 5.077951002227171e-05, |
|
"loss": 0.0002, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"grad_norm": 0.0017906671855598688, |
|
"learning_rate": 5.033407572383074e-05, |
|
"loss": 0.0003, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"grad_norm": 0.002051855204626918, |
|
"learning_rate": 4.988864142538976e-05, |
|
"loss": 0.0003, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"grad_norm": 0.0007296734838746488, |
|
"learning_rate": 4.944320712694878e-05, |
|
"loss": 0.0001, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"grad_norm": 0.0005030659376643598, |
|
"learning_rate": 4.89977728285078e-05, |
|
"loss": 0.001, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"grad_norm": 0.00040412909584119916, |
|
"learning_rate": 4.855233853006682e-05, |
|
"loss": 0.0001, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_accuracy": 0.9686274509803922, |
|
"eval_f1": 0.9686274509803922, |
|
"eval_loss": 0.16111387312412262, |
|
"eval_precision": 0.9686274509803922, |
|
"eval_recall": 0.9686274509803922, |
|
"eval_runtime": 16.979, |
|
"eval_samples_per_second": 60.074, |
|
"eval_steps_per_second": 7.539, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"grad_norm": 0.0005902125267311931, |
|
"learning_rate": 4.810690423162584e-05, |
|
"loss": 0.0002, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"grad_norm": 0.0005297433235682547, |
|
"learning_rate": 4.766146993318486e-05, |
|
"loss": 0.0002, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"grad_norm": 0.0005097580142319202, |
|
"learning_rate": 4.721603563474388e-05, |
|
"loss": 0.0001, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"grad_norm": 0.0006824088632129133, |
|
"learning_rate": 4.67706013363029e-05, |
|
"loss": 0.0002, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"grad_norm": 0.0007693713996559381, |
|
"learning_rate": 4.632516703786192e-05, |
|
"loss": 0.0002, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"grad_norm": 0.0010686744935810566, |
|
"learning_rate": 4.5879732739420936e-05, |
|
"loss": 0.0001, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"grad_norm": 0.0008867672295309603, |
|
"learning_rate": 4.5434298440979956e-05, |
|
"loss": 0.0001, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"grad_norm": 0.029097959399223328, |
|
"learning_rate": 4.4988864142538976e-05, |
|
"loss": 0.0002, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"grad_norm": 1.0295618772506714, |
|
"learning_rate": 4.4543429844098e-05, |
|
"loss": 0.0232, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"grad_norm": 0.01833498664200306, |
|
"learning_rate": 4.4097995545657015e-05, |
|
"loss": 0.0003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_accuracy": 0.9676470588235294, |
|
"eval_f1": 0.9676470588235294, |
|
"eval_loss": 0.15838229656219482, |
|
"eval_precision": 0.9676470588235294, |
|
"eval_recall": 0.9676470588235294, |
|
"eval_runtime": 17.1614, |
|
"eval_samples_per_second": 59.436, |
|
"eval_steps_per_second": 7.459, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"grad_norm": 0.00047049217391759157, |
|
"learning_rate": 4.365256124721604e-05, |
|
"loss": 0.0002, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 0.0005326452082954347, |
|
"learning_rate": 4.3207126948775055e-05, |
|
"loss": 0.0001, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"grad_norm": 0.004704196471720934, |
|
"learning_rate": 4.276169265033408e-05, |
|
"loss": 0.0001, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"grad_norm": 0.0015603323699906468, |
|
"learning_rate": 4.2316258351893094e-05, |
|
"loss": 0.0001, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"grad_norm": 0.001259263837710023, |
|
"learning_rate": 4.187082405345212e-05, |
|
"loss": 0.0002, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"grad_norm": 0.0009968471713364124, |
|
"learning_rate": 4.142538975501114e-05, |
|
"loss": 0.0173, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"grad_norm": 0.0011363897938281298, |
|
"learning_rate": 4.097995545657016e-05, |
|
"loss": 0.0002, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"grad_norm": 0.000970890570897609, |
|
"learning_rate": 4.053452115812918e-05, |
|
"loss": 0.0002, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.0004184432327747345, |
|
"learning_rate": 4.00890868596882e-05, |
|
"loss": 0.0002, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0016488181427121162, |
|
"learning_rate": 3.964365256124722e-05, |
|
"loss": 0.0001, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_accuracy": 0.9715686274509804, |
|
"eval_f1": 0.9715686274509804, |
|
"eval_loss": 0.15909050405025482, |
|
"eval_precision": 0.9715686274509804, |
|
"eval_recall": 0.9715686274509804, |
|
"eval_runtime": 17.2363, |
|
"eval_samples_per_second": 59.178, |
|
"eval_steps_per_second": 7.426, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0007161149405874312, |
|
"learning_rate": 3.919821826280624e-05, |
|
"loss": 0.0001, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.025815103203058243, |
|
"learning_rate": 3.875278396436526e-05, |
|
"loss": 0.0002, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.6380942463874817, |
|
"learning_rate": 3.830734966592428e-05, |
|
"loss": 0.0067, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"grad_norm": 0.003612744389101863, |
|
"learning_rate": 3.78619153674833e-05, |
|
"loss": 0.0004, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"grad_norm": 0.002143553225323558, |
|
"learning_rate": 3.741648106904232e-05, |
|
"loss": 0.0002, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"grad_norm": 0.001735298428684473, |
|
"learning_rate": 3.697104677060134e-05, |
|
"loss": 0.0002, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"grad_norm": 0.00038884536479599774, |
|
"learning_rate": 3.652561247216036e-05, |
|
"loss": 0.0004, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"grad_norm": 0.0004352598334662616, |
|
"learning_rate": 3.608017817371938e-05, |
|
"loss": 0.0001, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"grad_norm": 0.0030426643788814545, |
|
"learning_rate": 3.5634743875278396e-05, |
|
"loss": 0.0002, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"grad_norm": 0.0004016205493826419, |
|
"learning_rate": 3.518930957683742e-05, |
|
"loss": 0.0005, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"eval_accuracy": 0.9705882352941176, |
|
"eval_f1": 0.9705882352941176, |
|
"eval_loss": 0.15964852273464203, |
|
"eval_precision": 0.9705882352941176, |
|
"eval_recall": 0.9705882352941176, |
|
"eval_runtime": 17.4233, |
|
"eval_samples_per_second": 58.542, |
|
"eval_steps_per_second": 7.346, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"grad_norm": 0.0010756178526207805, |
|
"learning_rate": 3.4743875278396436e-05, |
|
"loss": 0.0001, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"grad_norm": 0.002286019967868924, |
|
"learning_rate": 3.429844097995546e-05, |
|
"loss": 0.0001, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 0.0019246740266680717, |
|
"learning_rate": 3.3853006681514475e-05, |
|
"loss": 0.0001, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"grad_norm": 0.0022740724962204695, |
|
"learning_rate": 3.34075723830735e-05, |
|
"loss": 0.0001, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"grad_norm": 0.0003697601496241987, |
|
"learning_rate": 3.2962138084632515e-05, |
|
"loss": 0.0001, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"grad_norm": 0.00033845697180368006, |
|
"learning_rate": 3.251670378619154e-05, |
|
"loss": 0.0001, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 0.0004189737082924694, |
|
"learning_rate": 3.207126948775056e-05, |
|
"loss": 0.0001, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"grad_norm": 0.0008992131915874779, |
|
"learning_rate": 3.162583518930958e-05, |
|
"loss": 0.0004, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"grad_norm": 0.0008794433670118451, |
|
"learning_rate": 3.11804008908686e-05, |
|
"loss": 0.0001, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"grad_norm": 0.0017194098327308893, |
|
"learning_rate": 3.073496659242761e-05, |
|
"loss": 0.0002, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.9715686274509804, |
|
"eval_f1": 0.9715686274509804, |
|
"eval_loss": 0.15634377300739288, |
|
"eval_precision": 0.9715686274509804, |
|
"eval_recall": 0.9715686274509804, |
|
"eval_runtime": 17.0753, |
|
"eval_samples_per_second": 59.735, |
|
"eval_steps_per_second": 7.496, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"grad_norm": 0.0009110970422625542, |
|
"learning_rate": 3.028953229398664e-05, |
|
"loss": 0.0002, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"grad_norm": 0.0010168278822675347, |
|
"learning_rate": 2.9844097995545663e-05, |
|
"loss": 0.0001, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"grad_norm": 0.0005708567332476377, |
|
"learning_rate": 2.939866369710468e-05, |
|
"loss": 0.0001, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"grad_norm": 0.0033207752276211977, |
|
"learning_rate": 2.89532293986637e-05, |
|
"loss": 0.0001, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"grad_norm": 0.0010962020605802536, |
|
"learning_rate": 2.850779510022272e-05, |
|
"loss": 0.0001, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"grad_norm": 0.0008160584839060903, |
|
"learning_rate": 2.8062360801781738e-05, |
|
"loss": 0.0001, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"grad_norm": 0.001714337500743568, |
|
"learning_rate": 2.7616926503340758e-05, |
|
"loss": 0.0001, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"grad_norm": 0.002063535852357745, |
|
"learning_rate": 2.7171492204899778e-05, |
|
"loss": 0.0001, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"grad_norm": 0.0010866498341783881, |
|
"learning_rate": 2.67260579064588e-05, |
|
"loss": 0.0001, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"grad_norm": 0.0009104039054363966, |
|
"learning_rate": 2.6280623608017817e-05, |
|
"loss": 0.0002, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_accuracy": 0.9715686274509804, |
|
"eval_f1": 0.9715686274509804, |
|
"eval_loss": 0.15503399074077606, |
|
"eval_precision": 0.9715686274509804, |
|
"eval_recall": 0.9715686274509804, |
|
"eval_runtime": 17.3241, |
|
"eval_samples_per_second": 58.877, |
|
"eval_steps_per_second": 7.389, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"grad_norm": 0.0029284367337822914, |
|
"learning_rate": 2.583518930957684e-05, |
|
"loss": 0.007, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"grad_norm": 0.0005087658646516502, |
|
"learning_rate": 2.5389755011135856e-05, |
|
"loss": 0.0001, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"grad_norm": 0.0005650007515214384, |
|
"learning_rate": 2.494432071269488e-05, |
|
"loss": 0.0001, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"grad_norm": 0.0005321349017322063, |
|
"learning_rate": 2.44988864142539e-05, |
|
"loss": 0.0001, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 0.0008177491254173219, |
|
"learning_rate": 2.405345211581292e-05, |
|
"loss": 0.0001, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"grad_norm": 0.0005406069685705006, |
|
"learning_rate": 2.360801781737194e-05, |
|
"loss": 0.0001, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"grad_norm": 0.000659614393953234, |
|
"learning_rate": 2.316258351893096e-05, |
|
"loss": 0.0001, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"grad_norm": 0.0004996512434445322, |
|
"learning_rate": 2.2717149220489978e-05, |
|
"loss": 0.0001, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"grad_norm": 0.0002974416420329362, |
|
"learning_rate": 2.2271714922049e-05, |
|
"loss": 0.0001, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"grad_norm": 0.0011179678840562701, |
|
"learning_rate": 2.182628062360802e-05, |
|
"loss": 0.0001, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"eval_accuracy": 0.9705882352941176, |
|
"eval_f1": 0.9705882352941176, |
|
"eval_loss": 0.15417079627513885, |
|
"eval_precision": 0.9705882352941176, |
|
"eval_recall": 0.9705882352941176, |
|
"eval_runtime": 17.0729, |
|
"eval_samples_per_second": 59.744, |
|
"eval_steps_per_second": 7.497, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"grad_norm": 0.0005180141888558865, |
|
"learning_rate": 2.138084632516704e-05, |
|
"loss": 0.0001, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"grad_norm": 0.0005326379905454814, |
|
"learning_rate": 2.093541202672606e-05, |
|
"loss": 0.0001, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"grad_norm": 0.0006433409289456904, |
|
"learning_rate": 2.048997772828508e-05, |
|
"loss": 0.0001, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.002777345711365342, |
|
"learning_rate": 2.00445434298441e-05, |
|
"loss": 0.0001, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.0007074729655869305, |
|
"learning_rate": 1.959910913140312e-05, |
|
"loss": 0.0001, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.001536020776256919, |
|
"learning_rate": 1.915367483296214e-05, |
|
"loss": 0.0001, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0009424517047591507, |
|
"learning_rate": 1.870824053452116e-05, |
|
"loss": 0.0001, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.3284554183483124, |
|
"learning_rate": 1.826280623608018e-05, |
|
"loss": 0.0026, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"grad_norm": 0.00030903713195584714, |
|
"learning_rate": 1.7817371937639198e-05, |
|
"loss": 0.0001, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"grad_norm": 0.0005440358072519302, |
|
"learning_rate": 1.7371937639198218e-05, |
|
"loss": 0.0001, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_accuracy": 0.9715686274509804, |
|
"eval_f1": 0.9715686274509804, |
|
"eval_loss": 0.15382429957389832, |
|
"eval_precision": 0.9715686274509804, |
|
"eval_recall": 0.9715686274509804, |
|
"eval_runtime": 16.821, |
|
"eval_samples_per_second": 60.638, |
|
"eval_steps_per_second": 7.61, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"grad_norm": 0.0006746925064362586, |
|
"learning_rate": 1.6926503340757238e-05, |
|
"loss": 0.0001, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"grad_norm": 0.00037791216163896024, |
|
"learning_rate": 1.6481069042316257e-05, |
|
"loss": 0.0001, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 0.0012918213615193963, |
|
"learning_rate": 1.603563474387528e-05, |
|
"loss": 0.0001, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"grad_norm": 0.000723692704923451, |
|
"learning_rate": 1.55902004454343e-05, |
|
"loss": 0.0001, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"grad_norm": 0.0006746066501364112, |
|
"learning_rate": 1.514476614699332e-05, |
|
"loss": 0.0001, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"grad_norm": 0.000450183724751696, |
|
"learning_rate": 1.469933184855234e-05, |
|
"loss": 0.0001, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"grad_norm": 0.0011862111277878284, |
|
"learning_rate": 1.425389755011136e-05, |
|
"loss": 0.0001, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"grad_norm": 0.0017749534454196692, |
|
"learning_rate": 1.3808463251670379e-05, |
|
"loss": 0.0001, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"grad_norm": 0.0003237236524000764, |
|
"learning_rate": 1.33630289532294e-05, |
|
"loss": 0.0001, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"grad_norm": 0.0004740317235700786, |
|
"learning_rate": 1.291759465478842e-05, |
|
"loss": 0.0001, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"eval_accuracy": 0.9715686274509804, |
|
"eval_f1": 0.9715686274509804, |
|
"eval_loss": 0.15357248485088348, |
|
"eval_precision": 0.9715686274509804, |
|
"eval_recall": 0.9715686274509804, |
|
"eval_runtime": 17.097, |
|
"eval_samples_per_second": 59.659, |
|
"eval_steps_per_second": 7.487, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"grad_norm": 0.0004047084948979318, |
|
"learning_rate": 1.247216035634744e-05, |
|
"loss": 0.0001, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"grad_norm": 0.0028567886911332607, |
|
"learning_rate": 1.202672605790646e-05, |
|
"loss": 0.0001, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"grad_norm": 0.0005680415779352188, |
|
"learning_rate": 1.158129175946548e-05, |
|
"loss": 0.0001, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"grad_norm": 0.00213377526961267, |
|
"learning_rate": 1.11358574610245e-05, |
|
"loss": 0.0001, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"grad_norm": 0.001148115610703826, |
|
"learning_rate": 1.069042316258352e-05, |
|
"loss": 0.0001, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"grad_norm": 0.00045941799180582166, |
|
"learning_rate": 1.024498886414254e-05, |
|
"loss": 0.0001, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"grad_norm": 0.0024906108155846596, |
|
"learning_rate": 9.79955456570156e-06, |
|
"loss": 0.0001, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"grad_norm": 0.0004669167974498123, |
|
"learning_rate": 9.35412026726058e-06, |
|
"loss": 0.0001, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"grad_norm": 0.0003813539515249431, |
|
"learning_rate": 8.908685968819599e-06, |
|
"loss": 0.0001, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"grad_norm": 0.00048394210170954466, |
|
"learning_rate": 8.463251670378619e-06, |
|
"loss": 0.0001, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"eval_accuracy": 0.9715686274509804, |
|
"eval_f1": 0.9715686274509804, |
|
"eval_loss": 0.15336920320987701, |
|
"eval_precision": 0.9715686274509804, |
|
"eval_recall": 0.9715686274509804, |
|
"eval_runtime": 16.9378, |
|
"eval_samples_per_second": 60.22, |
|
"eval_steps_per_second": 7.557, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 0.001094264443963766, |
|
"learning_rate": 8.01781737193764e-06, |
|
"loss": 0.0001, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"grad_norm": 0.0007720951689407229, |
|
"learning_rate": 7.57238307349666e-06, |
|
"loss": 0.0001, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"grad_norm": 0.0010363436304032803, |
|
"learning_rate": 7.12694877505568e-06, |
|
"loss": 0.0001, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"grad_norm": 0.0006155350711196661, |
|
"learning_rate": 6.6815144766147e-06, |
|
"loss": 0.0001, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"grad_norm": 0.002117099007591605, |
|
"learning_rate": 6.23608017817372e-06, |
|
"loss": 0.0001, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"grad_norm": 0.00035223804297856987, |
|
"learning_rate": 5.79064587973274e-06, |
|
"loss": 0.0001, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"grad_norm": 0.0008616923005320132, |
|
"learning_rate": 5.34521158129176e-06, |
|
"loss": 0.0001, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"grad_norm": 0.0003698187065310776, |
|
"learning_rate": 4.89977728285078e-06, |
|
"loss": 0.0001, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"grad_norm": 0.0008464885177090764, |
|
"learning_rate": 4.4543429844097995e-06, |
|
"loss": 0.0001, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"grad_norm": 0.000521197565831244, |
|
"learning_rate": 4.00890868596882e-06, |
|
"loss": 0.0001, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_accuracy": 0.9715686274509804, |
|
"eval_f1": 0.9715686274509804, |
|
"eval_loss": 0.15329474210739136, |
|
"eval_precision": 0.9715686274509804, |
|
"eval_recall": 0.9715686274509804, |
|
"eval_runtime": 17.0026, |
|
"eval_samples_per_second": 59.991, |
|
"eval_steps_per_second": 7.528, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"grad_norm": 0.00078478833893314, |
|
"learning_rate": 3.56347438752784e-06, |
|
"loss": 0.0001, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"grad_norm": 0.00044067302951589227, |
|
"learning_rate": 3.11804008908686e-06, |
|
"loss": 0.0001, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"grad_norm": 0.0005106424796395004, |
|
"learning_rate": 2.67260579064588e-06, |
|
"loss": 0.0001, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"grad_norm": 0.0003234909090679139, |
|
"learning_rate": 2.2271714922048998e-06, |
|
"loss": 0.0001, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"grad_norm": 0.0003544181527104229, |
|
"learning_rate": 1.78173719376392e-06, |
|
"loss": 0.0001, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"grad_norm": 0.00040398509008809924, |
|
"learning_rate": 1.33630289532294e-06, |
|
"loss": 0.0001, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"grad_norm": 0.0009952255059033632, |
|
"learning_rate": 8.9086859688196e-07, |
|
"loss": 0.0001, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"grad_norm": 0.0003712301841005683, |
|
"learning_rate": 4.4543429844098e-07, |
|
"loss": 0.0001, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.0012604963267222047, |
|
"learning_rate": 0.0, |
|
"loss": 0.0099, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 4490, |
|
"total_flos": 5.562769847811564e+18, |
|
"train_loss": 0.025327244219600944, |
|
"train_runtime": 3846.586, |
|
"train_samples_per_second": 18.637, |
|
"train_steps_per_second": 1.167 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4490, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 5.562769847811564e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|