alexgrigore's picture
End of training
318e9f7 verified
raw
history blame contribute delete
No virus
35 kB
{
"best_metric": 0.7164179104477612,
"best_model_checkpoint": "videomae-base-finetuned-good-gesturePhaseV5/checkpoint-490",
"epoch": 19.036231884057973,
"eval_steps": 500,
"global_step": 1380,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007246376811594203,
"grad_norm": 10.268256187438965,
"learning_rate": 7.246376811594204e-07,
"loss": 1.7861,
"step": 10
},
{
"epoch": 0.014492753623188406,
"grad_norm": 9.569662094116211,
"learning_rate": 1.4492753623188408e-06,
"loss": 1.7539,
"step": 20
},
{
"epoch": 0.021739130434782608,
"grad_norm": 11.259232521057129,
"learning_rate": 2.173913043478261e-06,
"loss": 1.7245,
"step": 30
},
{
"epoch": 0.028985507246376812,
"grad_norm": 7.182712554931641,
"learning_rate": 2.8985507246376816e-06,
"loss": 1.5819,
"step": 40
},
{
"epoch": 0.036231884057971016,
"grad_norm": 7.182448387145996,
"learning_rate": 3.6231884057971017e-06,
"loss": 1.4508,
"step": 50
},
{
"epoch": 0.043478260869565216,
"grad_norm": 6.293506622314453,
"learning_rate": 4.347826086956522e-06,
"loss": 1.2736,
"step": 60
},
{
"epoch": 0.050724637681159424,
"grad_norm": 6.1872944831848145,
"learning_rate": 5.072463768115943e-06,
"loss": 1.1475,
"step": 70
},
{
"epoch": 0.050724637681159424,
"eval_accuracy": 0.5597014925373134,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 1.0,
"eval_accuracy_recovery": 0.0,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.0,
"eval_loss": 1.2558053731918335,
"eval_runtime": 12.049,
"eval_samples_per_second": 11.121,
"eval_steps_per_second": 1.411,
"step": 70
},
{
"epoch": 1.0072463768115942,
"grad_norm": 6.2471137046813965,
"learning_rate": 5.797101449275363e-06,
"loss": 1.1874,
"step": 80
},
{
"epoch": 1.0144927536231885,
"grad_norm": 5.99643611907959,
"learning_rate": 6.521739130434783e-06,
"loss": 1.1562,
"step": 90
},
{
"epoch": 1.0217391304347827,
"grad_norm": 5.748469829559326,
"learning_rate": 7.246376811594203e-06,
"loss": 1.1299,
"step": 100
},
{
"epoch": 1.0289855072463767,
"grad_norm": 4.690842628479004,
"learning_rate": 7.971014492753623e-06,
"loss": 1.0645,
"step": 110
},
{
"epoch": 1.036231884057971,
"grad_norm": 4.702245712280273,
"learning_rate": 8.695652173913044e-06,
"loss": 1.3726,
"step": 120
},
{
"epoch": 1.0434782608695652,
"grad_norm": 4.664677143096924,
"learning_rate": 9.420289855072464e-06,
"loss": 0.9989,
"step": 130
},
{
"epoch": 1.0507246376811594,
"grad_norm": 6.247674465179443,
"learning_rate": 9.98389694041868e-06,
"loss": 1.2103,
"step": 140
},
{
"epoch": 1.0507246376811594,
"eval_accuracy": 0.5597014925373134,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 1.0,
"eval_accuracy_recovery": 0.0,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.0,
"eval_loss": 1.2703653573989868,
"eval_runtime": 10.5504,
"eval_samples_per_second": 12.701,
"eval_steps_per_second": 1.611,
"step": 140
},
{
"epoch": 2.0072463768115942,
"grad_norm": 6.113107681274414,
"learning_rate": 9.903381642512077e-06,
"loss": 0.9008,
"step": 150
},
{
"epoch": 2.0144927536231885,
"grad_norm": 6.213434219360352,
"learning_rate": 9.822866344605476e-06,
"loss": 1.3296,
"step": 160
},
{
"epoch": 2.0217391304347827,
"grad_norm": 5.969342231750488,
"learning_rate": 9.742351046698874e-06,
"loss": 1.2484,
"step": 170
},
{
"epoch": 2.028985507246377,
"grad_norm": 7.0032196044921875,
"learning_rate": 9.66183574879227e-06,
"loss": 1.1239,
"step": 180
},
{
"epoch": 2.036231884057971,
"grad_norm": 5.675483703613281,
"learning_rate": 9.581320450885669e-06,
"loss": 1.1508,
"step": 190
},
{
"epoch": 2.0434782608695654,
"grad_norm": 5.311893939971924,
"learning_rate": 9.500805152979067e-06,
"loss": 1.1376,
"step": 200
},
{
"epoch": 2.050724637681159,
"grad_norm": 6.503353118896484,
"learning_rate": 9.420289855072464e-06,
"loss": 0.9964,
"step": 210
},
{
"epoch": 2.050724637681159,
"eval_accuracy": 0.5597014925373134,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 1.0,
"eval_accuracy_recovery": 0.0,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.0,
"eval_loss": 1.2141785621643066,
"eval_runtime": 10.6183,
"eval_samples_per_second": 12.62,
"eval_steps_per_second": 1.601,
"step": 210
},
{
"epoch": 3.0072463768115942,
"grad_norm": 4.335814476013184,
"learning_rate": 9.339774557165862e-06,
"loss": 1.059,
"step": 220
},
{
"epoch": 3.0144927536231885,
"grad_norm": 6.591854095458984,
"learning_rate": 9.25925925925926e-06,
"loss": 1.149,
"step": 230
},
{
"epoch": 3.0217391304347827,
"grad_norm": 6.057372570037842,
"learning_rate": 9.178743961352658e-06,
"loss": 0.9137,
"step": 240
},
{
"epoch": 3.028985507246377,
"grad_norm": 18.623048782348633,
"learning_rate": 9.098228663446056e-06,
"loss": 1.1594,
"step": 250
},
{
"epoch": 3.036231884057971,
"grad_norm": 8.108168601989746,
"learning_rate": 9.017713365539453e-06,
"loss": 1.1682,
"step": 260
},
{
"epoch": 3.0434782608695654,
"grad_norm": 8.030190467834473,
"learning_rate": 8.937198067632851e-06,
"loss": 1.2166,
"step": 270
},
{
"epoch": 3.050724637681159,
"grad_norm": 9.22143268585205,
"learning_rate": 8.85668276972625e-06,
"loss": 0.9975,
"step": 280
},
{
"epoch": 3.050724637681159,
"eval_accuracy": 0.5970149253731343,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.9733333333333334,
"eval_accuracy_recovery": 0.0,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.2692307692307692,
"eval_loss": 1.0746699571609497,
"eval_runtime": 10.8059,
"eval_samples_per_second": 12.401,
"eval_steps_per_second": 1.573,
"step": 280
},
{
"epoch": 4.007246376811594,
"grad_norm": 9.18785285949707,
"learning_rate": 8.776167471819646e-06,
"loss": 1.1711,
"step": 290
},
{
"epoch": 4.0144927536231885,
"grad_norm": 13.558119773864746,
"learning_rate": 8.695652173913044e-06,
"loss": 0.8541,
"step": 300
},
{
"epoch": 4.021739130434782,
"grad_norm": 25.042736053466797,
"learning_rate": 8.615136876006443e-06,
"loss": 0.9376,
"step": 310
},
{
"epoch": 4.028985507246377,
"grad_norm": 11.34363079071045,
"learning_rate": 8.53462157809984e-06,
"loss": 1.0186,
"step": 320
},
{
"epoch": 4.036231884057971,
"grad_norm": 10.07709789276123,
"learning_rate": 8.454106280193238e-06,
"loss": 1.0577,
"step": 330
},
{
"epoch": 4.043478260869565,
"grad_norm": 10.198676109313965,
"learning_rate": 8.373590982286636e-06,
"loss": 1.0924,
"step": 340
},
{
"epoch": 4.050724637681159,
"grad_norm": 19.099363327026367,
"learning_rate": 8.293075684380033e-06,
"loss": 1.0538,
"step": 350
},
{
"epoch": 4.050724637681159,
"eval_accuracy": 0.664179104477612,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.88,
"eval_accuracy_recovery": 0.0,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.8846153846153846,
"eval_loss": 0.9621986150741577,
"eval_runtime": 10.8298,
"eval_samples_per_second": 12.373,
"eval_steps_per_second": 1.57,
"step": 350
},
{
"epoch": 5.007246376811594,
"grad_norm": 4.384646892547607,
"learning_rate": 8.212560386473431e-06,
"loss": 0.9923,
"step": 360
},
{
"epoch": 5.0144927536231885,
"grad_norm": 16.15943717956543,
"learning_rate": 8.132045088566828e-06,
"loss": 0.937,
"step": 370
},
{
"epoch": 5.021739130434782,
"grad_norm": 23.731212615966797,
"learning_rate": 8.051529790660226e-06,
"loss": 1.0034,
"step": 380
},
{
"epoch": 5.028985507246377,
"grad_norm": 9.929147720336914,
"learning_rate": 7.971014492753623e-06,
"loss": 0.9414,
"step": 390
},
{
"epoch": 5.036231884057971,
"grad_norm": 7.188724517822266,
"learning_rate": 7.890499194847021e-06,
"loss": 0.9633,
"step": 400
},
{
"epoch": 5.043478260869565,
"grad_norm": 7.971716403961182,
"learning_rate": 7.80998389694042e-06,
"loss": 0.9313,
"step": 410
},
{
"epoch": 5.050724637681159,
"grad_norm": 23.818294525146484,
"learning_rate": 7.729468599033817e-06,
"loss": 1.0321,
"step": 420
},
{
"epoch": 5.050724637681159,
"eval_accuracy": 0.6567164179104478,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.8533333333333334,
"eval_accuracy_recovery": 0.0,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.9230769230769231,
"eval_loss": 0.9450957775115967,
"eval_runtime": 10.5545,
"eval_samples_per_second": 12.696,
"eval_steps_per_second": 1.611,
"step": 420
},
{
"epoch": 6.007246376811594,
"grad_norm": 6.358923435211182,
"learning_rate": 7.648953301127215e-06,
"loss": 0.7638,
"step": 430
},
{
"epoch": 6.0144927536231885,
"grad_norm": 7.487329483032227,
"learning_rate": 7.568438003220613e-06,
"loss": 0.8216,
"step": 440
},
{
"epoch": 6.021739130434782,
"grad_norm": 8.15117359161377,
"learning_rate": 7.48792270531401e-06,
"loss": 1.042,
"step": 450
},
{
"epoch": 6.028985507246377,
"grad_norm": 8.114563941955566,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.8528,
"step": 460
},
{
"epoch": 6.036231884057971,
"grad_norm": 10.497995376586914,
"learning_rate": 7.326892109500806e-06,
"loss": 1.0731,
"step": 470
},
{
"epoch": 6.043478260869565,
"grad_norm": 7.0596818923950195,
"learning_rate": 7.246376811594203e-06,
"loss": 0.89,
"step": 480
},
{
"epoch": 6.050724637681159,
"grad_norm": 7.714406490325928,
"learning_rate": 7.165861513687601e-06,
"loss": 0.7822,
"step": 490
},
{
"epoch": 6.050724637681159,
"eval_accuracy": 0.7164179104477612,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.96,
"eval_accuracy_recovery": 0.08333333333333333,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.8846153846153846,
"eval_loss": 0.8797218203544617,
"eval_runtime": 10.7094,
"eval_samples_per_second": 12.512,
"eval_steps_per_second": 1.587,
"step": 490
},
{
"epoch": 7.007246376811594,
"grad_norm": 6.505290508270264,
"learning_rate": 7.085346215780999e-06,
"loss": 0.7396,
"step": 500
},
{
"epoch": 7.0144927536231885,
"grad_norm": 7.676311492919922,
"learning_rate": 7.004830917874397e-06,
"loss": 1.0383,
"step": 510
},
{
"epoch": 7.021739130434782,
"grad_norm": 8.093998908996582,
"learning_rate": 6.924315619967794e-06,
"loss": 0.841,
"step": 520
},
{
"epoch": 7.028985507246377,
"grad_norm": 4.555324077606201,
"learning_rate": 6.843800322061193e-06,
"loss": 0.6253,
"step": 530
},
{
"epoch": 7.036231884057971,
"grad_norm": 6.43816614151001,
"learning_rate": 6.76328502415459e-06,
"loss": 0.9515,
"step": 540
},
{
"epoch": 7.043478260869565,
"grad_norm": 10.472518920898438,
"learning_rate": 6.682769726247987e-06,
"loss": 1.1703,
"step": 550
},
{
"epoch": 7.050724637681159,
"grad_norm": 12.832620620727539,
"learning_rate": 6.602254428341386e-06,
"loss": 0.8743,
"step": 560
},
{
"epoch": 7.050724637681159,
"eval_accuracy": 0.6791044776119403,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.8533333333333334,
"eval_accuracy_recovery": 0.08333333333333333,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.9399013519287109,
"eval_runtime": 10.6086,
"eval_samples_per_second": 12.631,
"eval_steps_per_second": 1.602,
"step": 560
},
{
"epoch": 8.007246376811594,
"grad_norm": 6.019155502319336,
"learning_rate": 6.521739130434783e-06,
"loss": 0.8912,
"step": 570
},
{
"epoch": 8.014492753623188,
"grad_norm": 5.795660018920898,
"learning_rate": 6.44122383252818e-06,
"loss": 0.8103,
"step": 580
},
{
"epoch": 8.021739130434783,
"grad_norm": 22.207584381103516,
"learning_rate": 6.360708534621579e-06,
"loss": 0.9291,
"step": 590
},
{
"epoch": 8.028985507246377,
"grad_norm": 8.41496467590332,
"learning_rate": 6.280193236714976e-06,
"loss": 0.8249,
"step": 600
},
{
"epoch": 8.03623188405797,
"grad_norm": 19.649620056152344,
"learning_rate": 6.199677938808374e-06,
"loss": 0.9209,
"step": 610
},
{
"epoch": 8.043478260869565,
"grad_norm": 33.21004867553711,
"learning_rate": 6.119162640901772e-06,
"loss": 0.7371,
"step": 620
},
{
"epoch": 8.05072463768116,
"grad_norm": 10.654520988464355,
"learning_rate": 6.03864734299517e-06,
"loss": 0.7515,
"step": 630
},
{
"epoch": 8.05072463768116,
"eval_accuracy": 0.6791044776119403,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.8666666666666667,
"eval_accuracy_recovery": 0.0,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.928985059261322,
"eval_runtime": 11.5822,
"eval_samples_per_second": 11.57,
"eval_steps_per_second": 1.468,
"step": 630
},
{
"epoch": 9.007246376811594,
"grad_norm": 9.228910446166992,
"learning_rate": 5.958132045088567e-06,
"loss": 0.8266,
"step": 640
},
{
"epoch": 9.014492753623188,
"grad_norm": 6.276782035827637,
"learning_rate": 5.877616747181965e-06,
"loss": 0.611,
"step": 650
},
{
"epoch": 9.021739130434783,
"grad_norm": 27.36185646057129,
"learning_rate": 5.797101449275363e-06,
"loss": 0.8595,
"step": 660
},
{
"epoch": 9.028985507246377,
"grad_norm": 13.051177024841309,
"learning_rate": 5.716586151368761e-06,
"loss": 1.0073,
"step": 670
},
{
"epoch": 9.03623188405797,
"grad_norm": 12.05443000793457,
"learning_rate": 5.6360708534621574e-06,
"loss": 0.6386,
"step": 680
},
{
"epoch": 9.043478260869565,
"grad_norm": 9.477375984191895,
"learning_rate": 5.555555555555557e-06,
"loss": 1.1109,
"step": 690
},
{
"epoch": 9.05072463768116,
"grad_norm": 16.6785945892334,
"learning_rate": 5.475040257648953e-06,
"loss": 0.8525,
"step": 700
},
{
"epoch": 9.05072463768116,
"eval_accuracy": 0.7089552238805971,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.9466666666666667,
"eval_accuracy_recovery": 0.16666666666666666,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.8461538461538461,
"eval_loss": 0.8446803092956543,
"eval_runtime": 15.7965,
"eval_samples_per_second": 8.483,
"eval_steps_per_second": 1.076,
"step": 700
},
{
"epoch": 10.007246376811594,
"grad_norm": 12.451231002807617,
"learning_rate": 5.394524959742351e-06,
"loss": 0.7396,
"step": 710
},
{
"epoch": 10.014492753623188,
"grad_norm": 6.226474761962891,
"learning_rate": 5.314009661835749e-06,
"loss": 0.7467,
"step": 720
},
{
"epoch": 10.021739130434783,
"grad_norm": 9.991294860839844,
"learning_rate": 5.233494363929147e-06,
"loss": 0.8203,
"step": 730
},
{
"epoch": 10.028985507246377,
"grad_norm": 18.472450256347656,
"learning_rate": 5.152979066022544e-06,
"loss": 0.766,
"step": 740
},
{
"epoch": 10.03623188405797,
"grad_norm": 8.452324867248535,
"learning_rate": 5.072463768115943e-06,
"loss": 0.8539,
"step": 750
},
{
"epoch": 10.043478260869565,
"grad_norm": 18.313405990600586,
"learning_rate": 4.99194847020934e-06,
"loss": 0.7443,
"step": 760
},
{
"epoch": 10.05072463768116,
"grad_norm": 18.442798614501953,
"learning_rate": 4.911433172302738e-06,
"loss": 0.7661,
"step": 770
},
{
"epoch": 10.05072463768116,
"eval_accuracy": 0.7089552238805971,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.9066666666666666,
"eval_accuracy_recovery": 0.16666666666666666,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.9615384615384616,
"eval_loss": 0.7856659293174744,
"eval_runtime": 10.3807,
"eval_samples_per_second": 12.909,
"eval_steps_per_second": 1.638,
"step": 770
},
{
"epoch": 11.007246376811594,
"grad_norm": 8.811756134033203,
"learning_rate": 4.830917874396135e-06,
"loss": 0.7784,
"step": 780
},
{
"epoch": 11.014492753623188,
"grad_norm": 8.056985855102539,
"learning_rate": 4.750402576489534e-06,
"loss": 0.7112,
"step": 790
},
{
"epoch": 11.021739130434783,
"grad_norm": 5.6117939949035645,
"learning_rate": 4.669887278582931e-06,
"loss": 0.6901,
"step": 800
},
{
"epoch": 11.028985507246377,
"grad_norm": 8.331842422485352,
"learning_rate": 4.589371980676329e-06,
"loss": 0.5963,
"step": 810
},
{
"epoch": 11.03623188405797,
"grad_norm": 7.48464822769165,
"learning_rate": 4.508856682769726e-06,
"loss": 0.9118,
"step": 820
},
{
"epoch": 11.043478260869565,
"grad_norm": 14.966755867004395,
"learning_rate": 4.428341384863125e-06,
"loss": 0.8462,
"step": 830
},
{
"epoch": 11.05072463768116,
"grad_norm": 13.086145401000977,
"learning_rate": 4.347826086956522e-06,
"loss": 0.8363,
"step": 840
},
{
"epoch": 11.05072463768116,
"eval_accuracy": 0.6865671641791045,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.92,
"eval_accuracy_recovery": 0.08333333333333333,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.8461538461538461,
"eval_loss": 0.8165404200553894,
"eval_runtime": 10.6177,
"eval_samples_per_second": 12.62,
"eval_steps_per_second": 1.601,
"step": 840
},
{
"epoch": 12.007246376811594,
"grad_norm": 13.03532886505127,
"learning_rate": 4.26731078904992e-06,
"loss": 0.8312,
"step": 850
},
{
"epoch": 12.014492753623188,
"grad_norm": 10.994771957397461,
"learning_rate": 4.186795491143318e-06,
"loss": 0.7242,
"step": 860
},
{
"epoch": 12.021739130434783,
"grad_norm": 37.28443908691406,
"learning_rate": 4.106280193236716e-06,
"loss": 0.8542,
"step": 870
},
{
"epoch": 12.028985507246377,
"grad_norm": 15.367025375366211,
"learning_rate": 4.025764895330113e-06,
"loss": 0.7253,
"step": 880
},
{
"epoch": 12.03623188405797,
"grad_norm": 18.963815689086914,
"learning_rate": 3.945249597423511e-06,
"loss": 0.6786,
"step": 890
},
{
"epoch": 12.043478260869565,
"grad_norm": 6.912456512451172,
"learning_rate": 3.864734299516908e-06,
"loss": 0.6771,
"step": 900
},
{
"epoch": 12.05072463768116,
"grad_norm": 11.315908432006836,
"learning_rate": 3.7842190016103066e-06,
"loss": 0.659,
"step": 910
},
{
"epoch": 12.05072463768116,
"eval_accuracy": 0.7164179104477612,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.9066666666666666,
"eval_accuracy_recovery": 0.16666666666666666,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.7950567007064819,
"eval_runtime": 10.5,
"eval_samples_per_second": 12.762,
"eval_steps_per_second": 1.619,
"step": 910
},
{
"epoch": 13.007246376811594,
"grad_norm": 14.643826484680176,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.7824,
"step": 920
},
{
"epoch": 13.014492753623188,
"grad_norm": 10.328850746154785,
"learning_rate": 3.6231884057971017e-06,
"loss": 0.7485,
"step": 930
},
{
"epoch": 13.021739130434783,
"grad_norm": 13.339781761169434,
"learning_rate": 3.5426731078904997e-06,
"loss": 0.6207,
"step": 940
},
{
"epoch": 13.028985507246377,
"grad_norm": 15.779186248779297,
"learning_rate": 3.462157809983897e-06,
"loss": 0.7358,
"step": 950
},
{
"epoch": 13.03623188405797,
"grad_norm": 9.903864860534668,
"learning_rate": 3.381642512077295e-06,
"loss": 0.7412,
"step": 960
},
{
"epoch": 13.043478260869565,
"grad_norm": 19.546676635742188,
"learning_rate": 3.301127214170693e-06,
"loss": 0.6234,
"step": 970
},
{
"epoch": 13.05072463768116,
"grad_norm": 11.30717945098877,
"learning_rate": 3.22061191626409e-06,
"loss": 0.6274,
"step": 980
},
{
"epoch": 13.05072463768116,
"eval_accuracy": 0.7014925373134329,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.8933333333333333,
"eval_accuracy_recovery": 0.08333333333333333,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.7753632068634033,
"eval_runtime": 10.5138,
"eval_samples_per_second": 12.745,
"eval_steps_per_second": 1.617,
"step": 980
},
{
"epoch": 14.007246376811594,
"grad_norm": 14.222391128540039,
"learning_rate": 3.140096618357488e-06,
"loss": 0.6564,
"step": 990
},
{
"epoch": 14.014492753623188,
"grad_norm": 9.9340238571167,
"learning_rate": 3.059581320450886e-06,
"loss": 0.6258,
"step": 1000
},
{
"epoch": 14.021739130434783,
"grad_norm": 21.6451416015625,
"learning_rate": 2.9790660225442837e-06,
"loss": 0.7534,
"step": 1010
},
{
"epoch": 14.028985507246377,
"grad_norm": 5.2904462814331055,
"learning_rate": 2.8985507246376816e-06,
"loss": 0.6742,
"step": 1020
},
{
"epoch": 14.03623188405797,
"grad_norm": 12.198628425598145,
"learning_rate": 2.8180354267310787e-06,
"loss": 0.6938,
"step": 1030
},
{
"epoch": 14.043478260869565,
"grad_norm": 16.807910919189453,
"learning_rate": 2.7375201288244767e-06,
"loss": 0.6308,
"step": 1040
},
{
"epoch": 14.05072463768116,
"grad_norm": 29.11658477783203,
"learning_rate": 2.6570048309178746e-06,
"loss": 0.7292,
"step": 1050
},
{
"epoch": 14.05072463768116,
"eval_accuracy": 0.6791044776119403,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.8266666666666667,
"eval_accuracy_recovery": 0.25,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.8128386735916138,
"eval_runtime": 10.6524,
"eval_samples_per_second": 12.579,
"eval_steps_per_second": 1.596,
"step": 1050
},
{
"epoch": 15.007246376811594,
"grad_norm": 19.81916046142578,
"learning_rate": 2.576489533011272e-06,
"loss": 0.6481,
"step": 1060
},
{
"epoch": 15.014492753623188,
"grad_norm": 23.879323959350586,
"learning_rate": 2.49597423510467e-06,
"loss": 0.6273,
"step": 1070
},
{
"epoch": 15.021739130434783,
"grad_norm": 26.635896682739258,
"learning_rate": 2.4154589371980677e-06,
"loss": 0.629,
"step": 1080
},
{
"epoch": 15.028985507246377,
"grad_norm": 15.82916259765625,
"learning_rate": 2.3349436392914656e-06,
"loss": 0.6142,
"step": 1090
},
{
"epoch": 15.03623188405797,
"grad_norm": 29.63594627380371,
"learning_rate": 2.254428341384863e-06,
"loss": 0.6727,
"step": 1100
},
{
"epoch": 15.043478260869565,
"grad_norm": 15.376564979553223,
"learning_rate": 2.173913043478261e-06,
"loss": 0.6719,
"step": 1110
},
{
"epoch": 15.05072463768116,
"grad_norm": 19.15326499938965,
"learning_rate": 2.093397745571659e-06,
"loss": 0.7447,
"step": 1120
},
{
"epoch": 15.05072463768116,
"eval_accuracy": 0.6865671641791045,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.84,
"eval_accuracy_recovery": 0.25,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.7860263586044312,
"eval_runtime": 10.5021,
"eval_samples_per_second": 12.759,
"eval_steps_per_second": 1.619,
"step": 1120
},
{
"epoch": 16.007246376811594,
"grad_norm": 12.135031700134277,
"learning_rate": 2.0128824476650566e-06,
"loss": 0.6189,
"step": 1130
},
{
"epoch": 16.014492753623188,
"grad_norm": 13.124465942382812,
"learning_rate": 1.932367149758454e-06,
"loss": 0.5972,
"step": 1140
},
{
"epoch": 16.02173913043478,
"grad_norm": 15.266533851623535,
"learning_rate": 1.8518518518518519e-06,
"loss": 0.8582,
"step": 1150
},
{
"epoch": 16.028985507246375,
"grad_norm": 15.358236312866211,
"learning_rate": 1.7713365539452498e-06,
"loss": 0.5844,
"step": 1160
},
{
"epoch": 16.036231884057973,
"grad_norm": 28.783262252807617,
"learning_rate": 1.6908212560386476e-06,
"loss": 0.6861,
"step": 1170
},
{
"epoch": 16.043478260869566,
"grad_norm": 26.525894165039062,
"learning_rate": 1.610305958132045e-06,
"loss": 0.5341,
"step": 1180
},
{
"epoch": 16.05072463768116,
"grad_norm": 20.598716735839844,
"learning_rate": 1.529790660225443e-06,
"loss": 0.5512,
"step": 1190
},
{
"epoch": 16.05072463768116,
"eval_accuracy": 0.7014925373134329,
"eval_accuracy_hold": 0.0625,
"eval_accuracy_preparation": 0.8666666666666667,
"eval_accuracy_recovery": 0.16666666666666666,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.7839486598968506,
"eval_runtime": 10.6658,
"eval_samples_per_second": 12.564,
"eval_steps_per_second": 1.594,
"step": 1190
},
{
"epoch": 17.007246376811594,
"grad_norm": 15.926338195800781,
"learning_rate": 1.4492753623188408e-06,
"loss": 0.6122,
"step": 1200
},
{
"epoch": 17.014492753623188,
"grad_norm": 9.888009071350098,
"learning_rate": 1.3687600644122383e-06,
"loss": 0.6741,
"step": 1210
},
{
"epoch": 17.02173913043478,
"grad_norm": 7.94158935546875,
"learning_rate": 1.288244766505636e-06,
"loss": 0.5769,
"step": 1220
},
{
"epoch": 17.028985507246375,
"grad_norm": 12.030184745788574,
"learning_rate": 1.2077294685990338e-06,
"loss": 0.7706,
"step": 1230
},
{
"epoch": 17.036231884057973,
"grad_norm": 12.405618667602539,
"learning_rate": 1.1272141706924316e-06,
"loss": 0.7073,
"step": 1240
},
{
"epoch": 17.043478260869566,
"grad_norm": 18.144861221313477,
"learning_rate": 1.0466988727858295e-06,
"loss": 0.5714,
"step": 1250
},
{
"epoch": 17.05072463768116,
"grad_norm": 51.77016067504883,
"learning_rate": 9.66183574879227e-07,
"loss": 0.3404,
"step": 1260
},
{
"epoch": 17.05072463768116,
"eval_accuracy": 0.7014925373134329,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.8533333333333334,
"eval_accuracy_recovery": 0.3333333333333333,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.8054906725883484,
"eval_runtime": 10.759,
"eval_samples_per_second": 12.455,
"eval_steps_per_second": 1.58,
"step": 1260
},
{
"epoch": 18.007246376811594,
"grad_norm": 22.187854766845703,
"learning_rate": 8.856682769726249e-07,
"loss": 0.6113,
"step": 1270
},
{
"epoch": 18.014492753623188,
"grad_norm": 16.749889373779297,
"learning_rate": 8.051529790660226e-07,
"loss": 0.6058,
"step": 1280
},
{
"epoch": 18.02173913043478,
"grad_norm": 16.847583770751953,
"learning_rate": 7.246376811594204e-07,
"loss": 0.7215,
"step": 1290
},
{
"epoch": 18.028985507246375,
"grad_norm": 71.80619049072266,
"learning_rate": 6.44122383252818e-07,
"loss": 0.483,
"step": 1300
},
{
"epoch": 18.036231884057973,
"grad_norm": 13.316573143005371,
"learning_rate": 5.636070853462158e-07,
"loss": 0.585,
"step": 1310
},
{
"epoch": 18.043478260869566,
"grad_norm": 22.565717697143555,
"learning_rate": 4.830917874396135e-07,
"loss": 0.5584,
"step": 1320
},
{
"epoch": 18.05072463768116,
"grad_norm": 9.562509536743164,
"learning_rate": 4.025764895330113e-07,
"loss": 0.4406,
"step": 1330
},
{
"epoch": 18.05072463768116,
"eval_accuracy": 0.6865671641791045,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.8533333333333334,
"eval_accuracy_recovery": 0.16666666666666666,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.7800428867340088,
"eval_runtime": 10.5812,
"eval_samples_per_second": 12.664,
"eval_steps_per_second": 1.607,
"step": 1330
},
{
"epoch": 19.007246376811594,
"grad_norm": 14.316792488098145,
"learning_rate": 3.22061191626409e-07,
"loss": 0.5768,
"step": 1340
},
{
"epoch": 19.014492753623188,
"grad_norm": 12.971110343933105,
"learning_rate": 2.4154589371980677e-07,
"loss": 0.5324,
"step": 1350
},
{
"epoch": 19.02173913043478,
"grad_norm": 18.0727596282959,
"learning_rate": 1.610305958132045e-07,
"loss": 0.4406,
"step": 1360
},
{
"epoch": 19.028985507246375,
"grad_norm": 20.428890228271484,
"learning_rate": 8.051529790660226e-08,
"loss": 0.5222,
"step": 1370
},
{
"epoch": 19.036231884057973,
"grad_norm": 28.665119171142578,
"learning_rate": 0.0,
"loss": 0.6358,
"step": 1380
},
{
"epoch": 19.036231884057973,
"eval_accuracy": 0.7014925373134329,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.88,
"eval_accuracy_recovery": 0.16666666666666666,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 1.0,
"eval_loss": 0.7816389203071594,
"eval_runtime": 10.5317,
"eval_samples_per_second": 12.724,
"eval_steps_per_second": 1.614,
"step": 1380
},
{
"epoch": 19.036231884057973,
"step": 1380,
"total_flos": 1.3709569395152978e+19,
"train_loss": 0.8566418366155762,
"train_runtime": 2187.3823,
"train_samples_per_second": 5.047,
"train_steps_per_second": 0.631
},
{
"epoch": 19.036231884057973,
"eval_accuracy": 0.6870748299319728,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.922077922077922,
"eval_accuracy_recovery": 0.0,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.8571428571428571,
"eval_loss": 0.9822249412536621,
"eval_runtime": 13.5675,
"eval_samples_per_second": 10.835,
"eval_steps_per_second": 1.4,
"step": 1380
},
{
"epoch": 19.036231884057973,
"eval_accuracy": 0.6870748299319728,
"eval_accuracy_hold": 0.0,
"eval_accuracy_preparation": 0.922077922077922,
"eval_accuracy_recovery": 0.0,
"eval_accuracy_stroke": 0.0,
"eval_accuracy_unknown": 0.8571428571428571,
"eval_loss": 0.9822250604629517,
"eval_runtime": 12.2253,
"eval_samples_per_second": 12.024,
"eval_steps_per_second": 1.554,
"step": 1380
}
],
"logging_steps": 10,
"max_steps": 1380,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.3709569395152978e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}