{ "best_metric": 0.7164179104477612, "best_model_checkpoint": "videomae-base-finetuned-good-gesturePhaseV5/checkpoint-490", "epoch": 19.036231884057973, "eval_steps": 500, "global_step": 1380, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007246376811594203, "grad_norm": 10.268256187438965, "learning_rate": 7.246376811594204e-07, "loss": 1.7861, "step": 10 }, { "epoch": 0.014492753623188406, "grad_norm": 9.569662094116211, "learning_rate": 1.4492753623188408e-06, "loss": 1.7539, "step": 20 }, { "epoch": 0.021739130434782608, "grad_norm": 11.259232521057129, "learning_rate": 2.173913043478261e-06, "loss": 1.7245, "step": 30 }, { "epoch": 0.028985507246376812, "grad_norm": 7.182712554931641, "learning_rate": 2.8985507246376816e-06, "loss": 1.5819, "step": 40 }, { "epoch": 0.036231884057971016, "grad_norm": 7.182448387145996, "learning_rate": 3.6231884057971017e-06, "loss": 1.4508, "step": 50 }, { "epoch": 0.043478260869565216, "grad_norm": 6.293506622314453, "learning_rate": 4.347826086956522e-06, "loss": 1.2736, "step": 60 }, { "epoch": 0.050724637681159424, "grad_norm": 6.1872944831848145, "learning_rate": 5.072463768115943e-06, "loss": 1.1475, "step": 70 }, { "epoch": 0.050724637681159424, "eval_accuracy": 0.5597014925373134, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 1.0, "eval_accuracy_recovery": 0.0, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.0, "eval_loss": 1.2558053731918335, "eval_runtime": 12.049, "eval_samples_per_second": 11.121, "eval_steps_per_second": 1.411, "step": 70 }, { "epoch": 1.0072463768115942, "grad_norm": 6.2471137046813965, "learning_rate": 5.797101449275363e-06, "loss": 1.1874, "step": 80 }, { "epoch": 1.0144927536231885, "grad_norm": 5.99643611907959, "learning_rate": 6.521739130434783e-06, "loss": 1.1562, "step": 90 }, { "epoch": 1.0217391304347827, "grad_norm": 5.748469829559326, "learning_rate": 7.246376811594203e-06, "loss": 1.1299, "step": 100 }, { "epoch": 1.0289855072463767, "grad_norm": 4.690842628479004, "learning_rate": 7.971014492753623e-06, "loss": 1.0645, "step": 110 }, { "epoch": 1.036231884057971, "grad_norm": 4.702245712280273, "learning_rate": 8.695652173913044e-06, "loss": 1.3726, "step": 120 }, { "epoch": 1.0434782608695652, "grad_norm": 4.664677143096924, "learning_rate": 9.420289855072464e-06, "loss": 0.9989, "step": 130 }, { "epoch": 1.0507246376811594, "grad_norm": 6.247674465179443, "learning_rate": 9.98389694041868e-06, "loss": 1.2103, "step": 140 }, { "epoch": 1.0507246376811594, "eval_accuracy": 0.5597014925373134, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 1.0, "eval_accuracy_recovery": 0.0, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.0, "eval_loss": 1.2703653573989868, "eval_runtime": 10.5504, "eval_samples_per_second": 12.701, "eval_steps_per_second": 1.611, "step": 140 }, { "epoch": 2.0072463768115942, "grad_norm": 6.113107681274414, "learning_rate": 9.903381642512077e-06, "loss": 0.9008, "step": 150 }, { "epoch": 2.0144927536231885, "grad_norm": 6.213434219360352, "learning_rate": 9.822866344605476e-06, "loss": 1.3296, "step": 160 }, { "epoch": 2.0217391304347827, "grad_norm": 5.969342231750488, "learning_rate": 9.742351046698874e-06, "loss": 1.2484, "step": 170 }, { "epoch": 2.028985507246377, "grad_norm": 7.0032196044921875, "learning_rate": 9.66183574879227e-06, "loss": 1.1239, "step": 180 }, { "epoch": 2.036231884057971, "grad_norm": 5.675483703613281, "learning_rate": 9.581320450885669e-06, "loss": 1.1508, "step": 190 }, { "epoch": 2.0434782608695654, "grad_norm": 5.311893939971924, "learning_rate": 9.500805152979067e-06, "loss": 1.1376, "step": 200 }, { "epoch": 2.050724637681159, "grad_norm": 6.503353118896484, "learning_rate": 9.420289855072464e-06, "loss": 0.9964, "step": 210 }, { "epoch": 2.050724637681159, "eval_accuracy": 0.5597014925373134, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 1.0, "eval_accuracy_recovery": 0.0, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.0, "eval_loss": 1.2141785621643066, "eval_runtime": 10.6183, "eval_samples_per_second": 12.62, "eval_steps_per_second": 1.601, "step": 210 }, { "epoch": 3.0072463768115942, "grad_norm": 4.335814476013184, "learning_rate": 9.339774557165862e-06, "loss": 1.059, "step": 220 }, { "epoch": 3.0144927536231885, "grad_norm": 6.591854095458984, "learning_rate": 9.25925925925926e-06, "loss": 1.149, "step": 230 }, { "epoch": 3.0217391304347827, "grad_norm": 6.057372570037842, "learning_rate": 9.178743961352658e-06, "loss": 0.9137, "step": 240 }, { "epoch": 3.028985507246377, "grad_norm": 18.623048782348633, "learning_rate": 9.098228663446056e-06, "loss": 1.1594, "step": 250 }, { "epoch": 3.036231884057971, "grad_norm": 8.108168601989746, "learning_rate": 9.017713365539453e-06, "loss": 1.1682, "step": 260 }, { "epoch": 3.0434782608695654, "grad_norm": 8.030190467834473, "learning_rate": 8.937198067632851e-06, "loss": 1.2166, "step": 270 }, { "epoch": 3.050724637681159, "grad_norm": 9.22143268585205, "learning_rate": 8.85668276972625e-06, "loss": 0.9975, "step": 280 }, { "epoch": 3.050724637681159, "eval_accuracy": 0.5970149253731343, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.9733333333333334, "eval_accuracy_recovery": 0.0, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.2692307692307692, "eval_loss": 1.0746699571609497, "eval_runtime": 10.8059, "eval_samples_per_second": 12.401, "eval_steps_per_second": 1.573, "step": 280 }, { "epoch": 4.007246376811594, "grad_norm": 9.18785285949707, "learning_rate": 8.776167471819646e-06, "loss": 1.1711, "step": 290 }, { "epoch": 4.0144927536231885, "grad_norm": 13.558119773864746, "learning_rate": 8.695652173913044e-06, "loss": 0.8541, "step": 300 }, { "epoch": 4.021739130434782, "grad_norm": 25.042736053466797, "learning_rate": 8.615136876006443e-06, "loss": 0.9376, "step": 310 }, { "epoch": 4.028985507246377, "grad_norm": 11.34363079071045, "learning_rate": 8.53462157809984e-06, "loss": 1.0186, "step": 320 }, { "epoch": 4.036231884057971, "grad_norm": 10.07709789276123, "learning_rate": 8.454106280193238e-06, "loss": 1.0577, "step": 330 }, { "epoch": 4.043478260869565, "grad_norm": 10.198676109313965, "learning_rate": 8.373590982286636e-06, "loss": 1.0924, "step": 340 }, { "epoch": 4.050724637681159, "grad_norm": 19.099363327026367, "learning_rate": 8.293075684380033e-06, "loss": 1.0538, "step": 350 }, { "epoch": 4.050724637681159, "eval_accuracy": 0.664179104477612, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.88, "eval_accuracy_recovery": 0.0, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.8846153846153846, "eval_loss": 0.9621986150741577, "eval_runtime": 10.8298, "eval_samples_per_second": 12.373, "eval_steps_per_second": 1.57, "step": 350 }, { "epoch": 5.007246376811594, "grad_norm": 4.384646892547607, "learning_rate": 8.212560386473431e-06, "loss": 0.9923, "step": 360 }, { "epoch": 5.0144927536231885, "grad_norm": 16.15943717956543, "learning_rate": 8.132045088566828e-06, "loss": 0.937, "step": 370 }, { "epoch": 5.021739130434782, "grad_norm": 23.731212615966797, "learning_rate": 8.051529790660226e-06, "loss": 1.0034, "step": 380 }, { "epoch": 5.028985507246377, "grad_norm": 9.929147720336914, "learning_rate": 7.971014492753623e-06, "loss": 0.9414, "step": 390 }, { "epoch": 5.036231884057971, "grad_norm": 7.188724517822266, "learning_rate": 7.890499194847021e-06, "loss": 0.9633, "step": 400 }, { "epoch": 5.043478260869565, "grad_norm": 7.971716403961182, "learning_rate": 7.80998389694042e-06, "loss": 0.9313, "step": 410 }, { "epoch": 5.050724637681159, "grad_norm": 23.818294525146484, "learning_rate": 7.729468599033817e-06, "loss": 1.0321, "step": 420 }, { "epoch": 5.050724637681159, "eval_accuracy": 0.6567164179104478, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.8533333333333334, "eval_accuracy_recovery": 0.0, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.9230769230769231, "eval_loss": 0.9450957775115967, "eval_runtime": 10.5545, "eval_samples_per_second": 12.696, "eval_steps_per_second": 1.611, "step": 420 }, { "epoch": 6.007246376811594, "grad_norm": 6.358923435211182, "learning_rate": 7.648953301127215e-06, "loss": 0.7638, "step": 430 }, { "epoch": 6.0144927536231885, "grad_norm": 7.487329483032227, "learning_rate": 7.568438003220613e-06, "loss": 0.8216, "step": 440 }, { "epoch": 6.021739130434782, "grad_norm": 8.15117359161377, "learning_rate": 7.48792270531401e-06, "loss": 1.042, "step": 450 }, { "epoch": 6.028985507246377, "grad_norm": 8.114563941955566, "learning_rate": 7.4074074074074075e-06, "loss": 0.8528, "step": 460 }, { "epoch": 6.036231884057971, "grad_norm": 10.497995376586914, "learning_rate": 7.326892109500806e-06, "loss": 1.0731, "step": 470 }, { "epoch": 6.043478260869565, "grad_norm": 7.0596818923950195, "learning_rate": 7.246376811594203e-06, "loss": 0.89, "step": 480 }, { "epoch": 6.050724637681159, "grad_norm": 7.714406490325928, "learning_rate": 7.165861513687601e-06, "loss": 0.7822, "step": 490 }, { "epoch": 6.050724637681159, "eval_accuracy": 0.7164179104477612, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.96, "eval_accuracy_recovery": 0.08333333333333333, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.8846153846153846, "eval_loss": 0.8797218203544617, "eval_runtime": 10.7094, "eval_samples_per_second": 12.512, "eval_steps_per_second": 1.587, "step": 490 }, { "epoch": 7.007246376811594, "grad_norm": 6.505290508270264, "learning_rate": 7.085346215780999e-06, "loss": 0.7396, "step": 500 }, { "epoch": 7.0144927536231885, "grad_norm": 7.676311492919922, "learning_rate": 7.004830917874397e-06, "loss": 1.0383, "step": 510 }, { "epoch": 7.021739130434782, "grad_norm": 8.093998908996582, "learning_rate": 6.924315619967794e-06, "loss": 0.841, "step": 520 }, { "epoch": 7.028985507246377, "grad_norm": 4.555324077606201, "learning_rate": 6.843800322061193e-06, "loss": 0.6253, "step": 530 }, { "epoch": 7.036231884057971, "grad_norm": 6.43816614151001, "learning_rate": 6.76328502415459e-06, "loss": 0.9515, "step": 540 }, { "epoch": 7.043478260869565, "grad_norm": 10.472518920898438, "learning_rate": 6.682769726247987e-06, "loss": 1.1703, "step": 550 }, { "epoch": 7.050724637681159, "grad_norm": 12.832620620727539, "learning_rate": 6.602254428341386e-06, "loss": 0.8743, "step": 560 }, { "epoch": 7.050724637681159, "eval_accuracy": 0.6791044776119403, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.8533333333333334, "eval_accuracy_recovery": 0.08333333333333333, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.9399013519287109, "eval_runtime": 10.6086, "eval_samples_per_second": 12.631, "eval_steps_per_second": 1.602, "step": 560 }, { "epoch": 8.007246376811594, "grad_norm": 6.019155502319336, "learning_rate": 6.521739130434783e-06, "loss": 0.8912, "step": 570 }, { "epoch": 8.014492753623188, "grad_norm": 5.795660018920898, "learning_rate": 6.44122383252818e-06, "loss": 0.8103, "step": 580 }, { "epoch": 8.021739130434783, "grad_norm": 22.207584381103516, "learning_rate": 6.360708534621579e-06, "loss": 0.9291, "step": 590 }, { "epoch": 8.028985507246377, "grad_norm": 8.41496467590332, "learning_rate": 6.280193236714976e-06, "loss": 0.8249, "step": 600 }, { "epoch": 8.03623188405797, "grad_norm": 19.649620056152344, "learning_rate": 6.199677938808374e-06, "loss": 0.9209, "step": 610 }, { "epoch": 8.043478260869565, "grad_norm": 33.21004867553711, "learning_rate": 6.119162640901772e-06, "loss": 0.7371, "step": 620 }, { "epoch": 8.05072463768116, "grad_norm": 10.654520988464355, "learning_rate": 6.03864734299517e-06, "loss": 0.7515, "step": 630 }, { "epoch": 8.05072463768116, "eval_accuracy": 0.6791044776119403, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.8666666666666667, "eval_accuracy_recovery": 0.0, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.928985059261322, "eval_runtime": 11.5822, "eval_samples_per_second": 11.57, "eval_steps_per_second": 1.468, "step": 630 }, { "epoch": 9.007246376811594, "grad_norm": 9.228910446166992, "learning_rate": 5.958132045088567e-06, "loss": 0.8266, "step": 640 }, { "epoch": 9.014492753623188, "grad_norm": 6.276782035827637, "learning_rate": 5.877616747181965e-06, "loss": 0.611, "step": 650 }, { "epoch": 9.021739130434783, "grad_norm": 27.36185646057129, "learning_rate": 5.797101449275363e-06, "loss": 0.8595, "step": 660 }, { "epoch": 9.028985507246377, "grad_norm": 13.051177024841309, "learning_rate": 5.716586151368761e-06, "loss": 1.0073, "step": 670 }, { "epoch": 9.03623188405797, "grad_norm": 12.05443000793457, "learning_rate": 5.6360708534621574e-06, "loss": 0.6386, "step": 680 }, { "epoch": 9.043478260869565, "grad_norm": 9.477375984191895, "learning_rate": 5.555555555555557e-06, "loss": 1.1109, "step": 690 }, { "epoch": 9.05072463768116, "grad_norm": 16.6785945892334, "learning_rate": 5.475040257648953e-06, "loss": 0.8525, "step": 700 }, { "epoch": 9.05072463768116, "eval_accuracy": 0.7089552238805971, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.9466666666666667, "eval_accuracy_recovery": 0.16666666666666666, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.8461538461538461, "eval_loss": 0.8446803092956543, "eval_runtime": 15.7965, "eval_samples_per_second": 8.483, "eval_steps_per_second": 1.076, "step": 700 }, { "epoch": 10.007246376811594, "grad_norm": 12.451231002807617, "learning_rate": 5.394524959742351e-06, "loss": 0.7396, "step": 710 }, { "epoch": 10.014492753623188, "grad_norm": 6.226474761962891, "learning_rate": 5.314009661835749e-06, "loss": 0.7467, "step": 720 }, { "epoch": 10.021739130434783, "grad_norm": 9.991294860839844, "learning_rate": 5.233494363929147e-06, "loss": 0.8203, "step": 730 }, { "epoch": 10.028985507246377, "grad_norm": 18.472450256347656, "learning_rate": 5.152979066022544e-06, "loss": 0.766, "step": 740 }, { "epoch": 10.03623188405797, "grad_norm": 8.452324867248535, "learning_rate": 5.072463768115943e-06, "loss": 0.8539, "step": 750 }, { "epoch": 10.043478260869565, "grad_norm": 18.313405990600586, "learning_rate": 4.99194847020934e-06, "loss": 0.7443, "step": 760 }, { "epoch": 10.05072463768116, "grad_norm": 18.442798614501953, "learning_rate": 4.911433172302738e-06, "loss": 0.7661, "step": 770 }, { "epoch": 10.05072463768116, "eval_accuracy": 0.7089552238805971, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.9066666666666666, "eval_accuracy_recovery": 0.16666666666666666, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.9615384615384616, "eval_loss": 0.7856659293174744, "eval_runtime": 10.3807, "eval_samples_per_second": 12.909, "eval_steps_per_second": 1.638, "step": 770 }, { "epoch": 11.007246376811594, "grad_norm": 8.811756134033203, "learning_rate": 4.830917874396135e-06, "loss": 0.7784, "step": 780 }, { "epoch": 11.014492753623188, "grad_norm": 8.056985855102539, "learning_rate": 4.750402576489534e-06, "loss": 0.7112, "step": 790 }, { "epoch": 11.021739130434783, "grad_norm": 5.6117939949035645, "learning_rate": 4.669887278582931e-06, "loss": 0.6901, "step": 800 }, { "epoch": 11.028985507246377, "grad_norm": 8.331842422485352, "learning_rate": 4.589371980676329e-06, "loss": 0.5963, "step": 810 }, { "epoch": 11.03623188405797, "grad_norm": 7.48464822769165, "learning_rate": 4.508856682769726e-06, "loss": 0.9118, "step": 820 }, { "epoch": 11.043478260869565, "grad_norm": 14.966755867004395, "learning_rate": 4.428341384863125e-06, "loss": 0.8462, "step": 830 }, { "epoch": 11.05072463768116, "grad_norm": 13.086145401000977, "learning_rate": 4.347826086956522e-06, "loss": 0.8363, "step": 840 }, { "epoch": 11.05072463768116, "eval_accuracy": 0.6865671641791045, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.92, "eval_accuracy_recovery": 0.08333333333333333, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.8461538461538461, "eval_loss": 0.8165404200553894, "eval_runtime": 10.6177, "eval_samples_per_second": 12.62, "eval_steps_per_second": 1.601, "step": 840 }, { "epoch": 12.007246376811594, "grad_norm": 13.03532886505127, "learning_rate": 4.26731078904992e-06, "loss": 0.8312, "step": 850 }, { "epoch": 12.014492753623188, "grad_norm": 10.994771957397461, "learning_rate": 4.186795491143318e-06, "loss": 0.7242, "step": 860 }, { "epoch": 12.021739130434783, "grad_norm": 37.28443908691406, "learning_rate": 4.106280193236716e-06, "loss": 0.8542, "step": 870 }, { "epoch": 12.028985507246377, "grad_norm": 15.367025375366211, "learning_rate": 4.025764895330113e-06, "loss": 0.7253, "step": 880 }, { "epoch": 12.03623188405797, "grad_norm": 18.963815689086914, "learning_rate": 3.945249597423511e-06, "loss": 0.6786, "step": 890 }, { "epoch": 12.043478260869565, "grad_norm": 6.912456512451172, "learning_rate": 3.864734299516908e-06, "loss": 0.6771, "step": 900 }, { "epoch": 12.05072463768116, "grad_norm": 11.315908432006836, "learning_rate": 3.7842190016103066e-06, "loss": 0.659, "step": 910 }, { "epoch": 12.05072463768116, "eval_accuracy": 0.7164179104477612, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.9066666666666666, "eval_accuracy_recovery": 0.16666666666666666, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.7950567007064819, "eval_runtime": 10.5, "eval_samples_per_second": 12.762, "eval_steps_per_second": 1.619, "step": 910 }, { "epoch": 13.007246376811594, "grad_norm": 14.643826484680176, "learning_rate": 3.7037037037037037e-06, "loss": 0.7824, "step": 920 }, { "epoch": 13.014492753623188, "grad_norm": 10.328850746154785, "learning_rate": 3.6231884057971017e-06, "loss": 0.7485, "step": 930 }, { "epoch": 13.021739130434783, "grad_norm": 13.339781761169434, "learning_rate": 3.5426731078904997e-06, "loss": 0.6207, "step": 940 }, { "epoch": 13.028985507246377, "grad_norm": 15.779186248779297, "learning_rate": 3.462157809983897e-06, "loss": 0.7358, "step": 950 }, { "epoch": 13.03623188405797, "grad_norm": 9.903864860534668, "learning_rate": 3.381642512077295e-06, "loss": 0.7412, "step": 960 }, { "epoch": 13.043478260869565, "grad_norm": 19.546676635742188, "learning_rate": 3.301127214170693e-06, "loss": 0.6234, "step": 970 }, { "epoch": 13.05072463768116, "grad_norm": 11.30717945098877, "learning_rate": 3.22061191626409e-06, "loss": 0.6274, "step": 980 }, { "epoch": 13.05072463768116, "eval_accuracy": 0.7014925373134329, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.8933333333333333, "eval_accuracy_recovery": 0.08333333333333333, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.7753632068634033, "eval_runtime": 10.5138, "eval_samples_per_second": 12.745, "eval_steps_per_second": 1.617, "step": 980 }, { "epoch": 14.007246376811594, "grad_norm": 14.222391128540039, "learning_rate": 3.140096618357488e-06, "loss": 0.6564, "step": 990 }, { "epoch": 14.014492753623188, "grad_norm": 9.9340238571167, "learning_rate": 3.059581320450886e-06, "loss": 0.6258, "step": 1000 }, { "epoch": 14.021739130434783, "grad_norm": 21.6451416015625, "learning_rate": 2.9790660225442837e-06, "loss": 0.7534, "step": 1010 }, { "epoch": 14.028985507246377, "grad_norm": 5.2904462814331055, "learning_rate": 2.8985507246376816e-06, "loss": 0.6742, "step": 1020 }, { "epoch": 14.03623188405797, "grad_norm": 12.198628425598145, "learning_rate": 2.8180354267310787e-06, "loss": 0.6938, "step": 1030 }, { "epoch": 14.043478260869565, "grad_norm": 16.807910919189453, "learning_rate": 2.7375201288244767e-06, "loss": 0.6308, "step": 1040 }, { "epoch": 14.05072463768116, "grad_norm": 29.11658477783203, "learning_rate": 2.6570048309178746e-06, "loss": 0.7292, "step": 1050 }, { "epoch": 14.05072463768116, "eval_accuracy": 0.6791044776119403, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.8266666666666667, "eval_accuracy_recovery": 0.25, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.8128386735916138, "eval_runtime": 10.6524, "eval_samples_per_second": 12.579, "eval_steps_per_second": 1.596, "step": 1050 }, { "epoch": 15.007246376811594, "grad_norm": 19.81916046142578, "learning_rate": 2.576489533011272e-06, "loss": 0.6481, "step": 1060 }, { "epoch": 15.014492753623188, "grad_norm": 23.879323959350586, "learning_rate": 2.49597423510467e-06, "loss": 0.6273, "step": 1070 }, { "epoch": 15.021739130434783, "grad_norm": 26.635896682739258, "learning_rate": 2.4154589371980677e-06, "loss": 0.629, "step": 1080 }, { "epoch": 15.028985507246377, "grad_norm": 15.82916259765625, "learning_rate": 2.3349436392914656e-06, "loss": 0.6142, "step": 1090 }, { "epoch": 15.03623188405797, "grad_norm": 29.63594627380371, "learning_rate": 2.254428341384863e-06, "loss": 0.6727, "step": 1100 }, { "epoch": 15.043478260869565, "grad_norm": 15.376564979553223, "learning_rate": 2.173913043478261e-06, "loss": 0.6719, "step": 1110 }, { "epoch": 15.05072463768116, "grad_norm": 19.15326499938965, "learning_rate": 2.093397745571659e-06, "loss": 0.7447, "step": 1120 }, { "epoch": 15.05072463768116, "eval_accuracy": 0.6865671641791045, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.84, "eval_accuracy_recovery": 0.25, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.7860263586044312, "eval_runtime": 10.5021, "eval_samples_per_second": 12.759, "eval_steps_per_second": 1.619, "step": 1120 }, { "epoch": 16.007246376811594, "grad_norm": 12.135031700134277, "learning_rate": 2.0128824476650566e-06, "loss": 0.6189, "step": 1130 }, { "epoch": 16.014492753623188, "grad_norm": 13.124465942382812, "learning_rate": 1.932367149758454e-06, "loss": 0.5972, "step": 1140 }, { "epoch": 16.02173913043478, "grad_norm": 15.266533851623535, "learning_rate": 1.8518518518518519e-06, "loss": 0.8582, "step": 1150 }, { "epoch": 16.028985507246375, "grad_norm": 15.358236312866211, "learning_rate": 1.7713365539452498e-06, "loss": 0.5844, "step": 1160 }, { "epoch": 16.036231884057973, "grad_norm": 28.783262252807617, "learning_rate": 1.6908212560386476e-06, "loss": 0.6861, "step": 1170 }, { "epoch": 16.043478260869566, "grad_norm": 26.525894165039062, "learning_rate": 1.610305958132045e-06, "loss": 0.5341, "step": 1180 }, { "epoch": 16.05072463768116, "grad_norm": 20.598716735839844, "learning_rate": 1.529790660225443e-06, "loss": 0.5512, "step": 1190 }, { "epoch": 16.05072463768116, "eval_accuracy": 0.7014925373134329, "eval_accuracy_hold": 0.0625, "eval_accuracy_preparation": 0.8666666666666667, "eval_accuracy_recovery": 0.16666666666666666, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.7839486598968506, "eval_runtime": 10.6658, "eval_samples_per_second": 12.564, "eval_steps_per_second": 1.594, "step": 1190 }, { "epoch": 17.007246376811594, "grad_norm": 15.926338195800781, "learning_rate": 1.4492753623188408e-06, "loss": 0.6122, "step": 1200 }, { "epoch": 17.014492753623188, "grad_norm": 9.888009071350098, "learning_rate": 1.3687600644122383e-06, "loss": 0.6741, "step": 1210 }, { "epoch": 17.02173913043478, "grad_norm": 7.94158935546875, "learning_rate": 1.288244766505636e-06, "loss": 0.5769, "step": 1220 }, { "epoch": 17.028985507246375, "grad_norm": 12.030184745788574, "learning_rate": 1.2077294685990338e-06, "loss": 0.7706, "step": 1230 }, { "epoch": 17.036231884057973, "grad_norm": 12.405618667602539, "learning_rate": 1.1272141706924316e-06, "loss": 0.7073, "step": 1240 }, { "epoch": 17.043478260869566, "grad_norm": 18.144861221313477, "learning_rate": 1.0466988727858295e-06, "loss": 0.5714, "step": 1250 }, { "epoch": 17.05072463768116, "grad_norm": 51.77016067504883, "learning_rate": 9.66183574879227e-07, "loss": 0.3404, "step": 1260 }, { "epoch": 17.05072463768116, "eval_accuracy": 0.7014925373134329, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.8533333333333334, "eval_accuracy_recovery": 0.3333333333333333, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.8054906725883484, "eval_runtime": 10.759, "eval_samples_per_second": 12.455, "eval_steps_per_second": 1.58, "step": 1260 }, { "epoch": 18.007246376811594, "grad_norm": 22.187854766845703, "learning_rate": 8.856682769726249e-07, "loss": 0.6113, "step": 1270 }, { "epoch": 18.014492753623188, "grad_norm": 16.749889373779297, "learning_rate": 8.051529790660226e-07, "loss": 0.6058, "step": 1280 }, { "epoch": 18.02173913043478, "grad_norm": 16.847583770751953, "learning_rate": 7.246376811594204e-07, "loss": 0.7215, "step": 1290 }, { "epoch": 18.028985507246375, "grad_norm": 71.80619049072266, "learning_rate": 6.44122383252818e-07, "loss": 0.483, "step": 1300 }, { "epoch": 18.036231884057973, "grad_norm": 13.316573143005371, "learning_rate": 5.636070853462158e-07, "loss": 0.585, "step": 1310 }, { "epoch": 18.043478260869566, "grad_norm": 22.565717697143555, "learning_rate": 4.830917874396135e-07, "loss": 0.5584, "step": 1320 }, { "epoch": 18.05072463768116, "grad_norm": 9.562509536743164, "learning_rate": 4.025764895330113e-07, "loss": 0.4406, "step": 1330 }, { "epoch": 18.05072463768116, "eval_accuracy": 0.6865671641791045, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.8533333333333334, "eval_accuracy_recovery": 0.16666666666666666, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.7800428867340088, "eval_runtime": 10.5812, "eval_samples_per_second": 12.664, "eval_steps_per_second": 1.607, "step": 1330 }, { "epoch": 19.007246376811594, "grad_norm": 14.316792488098145, "learning_rate": 3.22061191626409e-07, "loss": 0.5768, "step": 1340 }, { "epoch": 19.014492753623188, "grad_norm": 12.971110343933105, "learning_rate": 2.4154589371980677e-07, "loss": 0.5324, "step": 1350 }, { "epoch": 19.02173913043478, "grad_norm": 18.0727596282959, "learning_rate": 1.610305958132045e-07, "loss": 0.4406, "step": 1360 }, { "epoch": 19.028985507246375, "grad_norm": 20.428890228271484, "learning_rate": 8.051529790660226e-08, "loss": 0.5222, "step": 1370 }, { "epoch": 19.036231884057973, "grad_norm": 28.665119171142578, "learning_rate": 0.0, "loss": 0.6358, "step": 1380 }, { "epoch": 19.036231884057973, "eval_accuracy": 0.7014925373134329, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.88, "eval_accuracy_recovery": 0.16666666666666666, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 1.0, "eval_loss": 0.7816389203071594, "eval_runtime": 10.5317, "eval_samples_per_second": 12.724, "eval_steps_per_second": 1.614, "step": 1380 }, { "epoch": 19.036231884057973, "step": 1380, "total_flos": 1.3709569395152978e+19, "train_loss": 0.8566418366155762, "train_runtime": 2187.3823, "train_samples_per_second": 5.047, "train_steps_per_second": 0.631 }, { "epoch": 19.036231884057973, "eval_accuracy": 0.6870748299319728, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.922077922077922, "eval_accuracy_recovery": 0.0, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.8571428571428571, "eval_loss": 0.9822249412536621, "eval_runtime": 13.5675, "eval_samples_per_second": 10.835, "eval_steps_per_second": 1.4, "step": 1380 }, { "epoch": 19.036231884057973, "eval_accuracy": 0.6870748299319728, "eval_accuracy_hold": 0.0, "eval_accuracy_preparation": 0.922077922077922, "eval_accuracy_recovery": 0.0, "eval_accuracy_stroke": 0.0, "eval_accuracy_unknown": 0.8571428571428571, "eval_loss": 0.9822250604629517, "eval_runtime": 12.2253, "eval_samples_per_second": 12.024, "eval_steps_per_second": 1.554, "step": 1380 } ], "logging_steps": 10, "max_steps": 1380, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3709569395152978e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }