|
{ |
|
"best_metric": 0.9737991266375546, |
|
"best_model_checkpoint": "videomae-base-finetuned-good-gesturePhaseV11/checkpoint-1260", |
|
"epoch": 9.092857142857143, |
|
"eval_steps": 500, |
|
"global_step": 1260, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007936507936507936, |
|
"grad_norm": 10.67099666595459, |
|
"learning_rate": 7.936507936507937e-07, |
|
"loss": 1.6937, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015873015873015872, |
|
"grad_norm": 10.839055061340332, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 1.6052, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.023809523809523808, |
|
"grad_norm": 10.671605110168457, |
|
"learning_rate": 2.380952380952381e-06, |
|
"loss": 1.5128, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.031746031746031744, |
|
"grad_norm": 10.129815101623535, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 1.3375, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03968253968253968, |
|
"grad_norm": 7.233930587768555, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 1.1729, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.047619047619047616, |
|
"grad_norm": 7.941068172454834, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 1.0557, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05555555555555555, |
|
"grad_norm": 5.641613483428955, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.8538, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06349206349206349, |
|
"grad_norm": 11.44336986541748, |
|
"learning_rate": 6.349206349206349e-06, |
|
"loss": 0.9715, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 6.217037200927734, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.9786, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07936507936507936, |
|
"grad_norm": 8.418871879577637, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.8612, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0873015873015873, |
|
"grad_norm": 8.026611328125, |
|
"learning_rate": 8.730158730158731e-06, |
|
"loss": 0.9033, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09523809523809523, |
|
"grad_norm": 5.777014255523682, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 1.2097, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1007936507936508, |
|
"eval_accuracy": 0.6899563318777293, |
|
"eval_accuracy_hold": 0.0, |
|
"eval_accuracy_preparation": 1.0, |
|
"eval_accuracy_recovery": 0.0, |
|
"eval_accuracy_stroke": 0.0, |
|
"eval_accuracy_unknown": 0.0, |
|
"eval_loss": 1.0243256092071533, |
|
"eval_runtime": 22.3266, |
|
"eval_samples_per_second": 10.257, |
|
"eval_steps_per_second": 1.299, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.0023809523809524, |
|
"grad_norm": 4.667547225952148, |
|
"learning_rate": 9.9647266313933e-06, |
|
"loss": 0.9759, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0103174603174603, |
|
"grad_norm": 6.570101261138916, |
|
"learning_rate": 9.876543209876543e-06, |
|
"loss": 0.8687, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0182539682539682, |
|
"grad_norm": 3.454606533050537, |
|
"learning_rate": 9.788359788359789e-06, |
|
"loss": 0.9626, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.026190476190476, |
|
"grad_norm": 5.504855632781982, |
|
"learning_rate": 9.700176366843034e-06, |
|
"loss": 0.8062, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0341269841269842, |
|
"grad_norm": 11.000463485717773, |
|
"learning_rate": 9.61199294532628e-06, |
|
"loss": 0.8868, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0420634920634921, |
|
"grad_norm": 9.436511039733887, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 0.7972, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 4.840676784515381, |
|
"learning_rate": 9.43562610229277e-06, |
|
"loss": 0.9835, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.057936507936508, |
|
"grad_norm": 5.491481781005859, |
|
"learning_rate": 9.347442680776014e-06, |
|
"loss": 0.8156, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0658730158730159, |
|
"grad_norm": 9.045355796813965, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 0.9068, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0738095238095238, |
|
"grad_norm": 6.567234516143799, |
|
"learning_rate": 9.171075837742504e-06, |
|
"loss": 0.9518, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0817460317460317, |
|
"grad_norm": 4.041014194488525, |
|
"learning_rate": 9.08289241622575e-06, |
|
"loss": 0.8425, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0896825396825396, |
|
"grad_norm": 5.872761249542236, |
|
"learning_rate": 8.994708994708995e-06, |
|
"loss": 0.7499, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0976190476190477, |
|
"grad_norm": 5.4285888671875, |
|
"learning_rate": 8.90652557319224e-06, |
|
"loss": 0.8, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1007936507936509, |
|
"eval_accuracy": 0.6899563318777293, |
|
"eval_accuracy_hold": 0.0, |
|
"eval_accuracy_preparation": 1.0, |
|
"eval_accuracy_recovery": 0.0, |
|
"eval_accuracy_stroke": 0.0, |
|
"eval_accuracy_unknown": 0.0, |
|
"eval_loss": 0.8716774582862854, |
|
"eval_runtime": 18.6999, |
|
"eval_samples_per_second": 12.246, |
|
"eval_steps_per_second": 1.551, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 2.0047619047619047, |
|
"grad_norm": 7.916933536529541, |
|
"learning_rate": 8.818342151675486e-06, |
|
"loss": 0.8939, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.0126984126984127, |
|
"grad_norm": 10.032241821289062, |
|
"learning_rate": 8.730158730158731e-06, |
|
"loss": 0.9507, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.0206349206349206, |
|
"grad_norm": 5.500053405761719, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 0.7524, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.0285714285714285, |
|
"grad_norm": 2.9224343299865723, |
|
"learning_rate": 8.553791887125222e-06, |
|
"loss": 0.8033, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.0365079365079364, |
|
"grad_norm": 9.924591064453125, |
|
"learning_rate": 8.465608465608466e-06, |
|
"loss": 0.7143, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0444444444444443, |
|
"grad_norm": 6.131772518157959, |
|
"learning_rate": 8.377425044091711e-06, |
|
"loss": 0.8056, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.052380952380952, |
|
"grad_norm": 5.074908256530762, |
|
"learning_rate": 8.289241622574956e-06, |
|
"loss": 0.7329, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.06031746031746, |
|
"grad_norm": 6.0842061042785645, |
|
"learning_rate": 8.201058201058202e-06, |
|
"loss": 0.5516, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.0682539682539685, |
|
"grad_norm": 6.810462951660156, |
|
"learning_rate": 8.112874779541447e-06, |
|
"loss": 0.6635, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0761904761904764, |
|
"grad_norm": 8.190900802612305, |
|
"learning_rate": 8.024691358024692e-06, |
|
"loss": 0.707, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.0841269841269843, |
|
"grad_norm": 6.851507663726807, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.8107, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.092063492063492, |
|
"grad_norm": 10.986032485961914, |
|
"learning_rate": 7.848324514991183e-06, |
|
"loss": 0.5181, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 7.873786926269531, |
|
"learning_rate": 7.760141093474427e-06, |
|
"loss": 0.5199, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.100793650793651, |
|
"eval_accuracy": 0.7729257641921398, |
|
"eval_accuracy_hold": 0.0, |
|
"eval_accuracy_preparation": 0.9810126582278481, |
|
"eval_accuracy_recovery": 0.17647058823529413, |
|
"eval_accuracy_stroke": 0.0, |
|
"eval_accuracy_unknown": 0.8636363636363636, |
|
"eval_loss": 0.6724938750267029, |
|
"eval_runtime": 17.1616, |
|
"eval_samples_per_second": 13.344, |
|
"eval_steps_per_second": 1.69, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 3.007142857142857, |
|
"grad_norm": 9.23725700378418, |
|
"learning_rate": 7.671957671957672e-06, |
|
"loss": 0.5796, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.015079365079365, |
|
"grad_norm": 14.487571716308594, |
|
"learning_rate": 7.583774250440918e-06, |
|
"loss": 0.6356, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.023015873015873, |
|
"grad_norm": 8.111766815185547, |
|
"learning_rate": 7.495590828924163e-06, |
|
"loss": 0.609, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.030952380952381, |
|
"grad_norm": 10.31395149230957, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.4853, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.0388888888888888, |
|
"grad_norm": 9.713062286376953, |
|
"learning_rate": 7.319223985890654e-06, |
|
"loss": 0.4254, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.0468253968253967, |
|
"grad_norm": 24.236248016357422, |
|
"learning_rate": 7.231040564373898e-06, |
|
"loss": 0.6144, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.0547619047619046, |
|
"grad_norm": 9.259011268615723, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.5036, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.0626984126984125, |
|
"grad_norm": 9.141166687011719, |
|
"learning_rate": 7.054673721340388e-06, |
|
"loss": 0.5061, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.070634920634921, |
|
"grad_norm": 1.9605706930160522, |
|
"learning_rate": 6.966490299823634e-06, |
|
"loss": 0.3654, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.0785714285714287, |
|
"grad_norm": 23.66200828552246, |
|
"learning_rate": 6.878306878306879e-06, |
|
"loss": 0.5331, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.0865079365079366, |
|
"grad_norm": 17.01007843017578, |
|
"learning_rate": 6.790123456790124e-06, |
|
"loss": 0.491, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.0944444444444446, |
|
"grad_norm": 18.177013397216797, |
|
"learning_rate": 6.701940035273369e-06, |
|
"loss": 0.3134, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.100793650793651, |
|
"eval_accuracy": 0.8427947598253275, |
|
"eval_accuracy_hold": 0.3076923076923077, |
|
"eval_accuracy_preparation": 1.0, |
|
"eval_accuracy_recovery": 0.4117647058823529, |
|
"eval_accuracy_stroke": 0.0, |
|
"eval_accuracy_unknown": 0.9090909090909091, |
|
"eval_loss": 0.47145798802375793, |
|
"eval_runtime": 20.9475, |
|
"eval_samples_per_second": 10.932, |
|
"eval_steps_per_second": 1.384, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 4.001587301587302, |
|
"grad_norm": 3.7459118366241455, |
|
"learning_rate": 6.613756613756615e-06, |
|
"loss": 0.2951, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.0095238095238095, |
|
"grad_norm": 1.2948265075683594, |
|
"learning_rate": 6.525573192239859e-06, |
|
"loss": 0.3015, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.017460317460317, |
|
"grad_norm": 28.894683837890625, |
|
"learning_rate": 6.437389770723105e-06, |
|
"loss": 0.5519, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.025396825396825, |
|
"grad_norm": 12.930984497070312, |
|
"learning_rate": 6.349206349206349e-06, |
|
"loss": 0.3167, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.033333333333333, |
|
"grad_norm": 14.674068450927734, |
|
"learning_rate": 6.2610229276895955e-06, |
|
"loss": 0.3852, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.041269841269841, |
|
"grad_norm": 5.561566352844238, |
|
"learning_rate": 6.17283950617284e-06, |
|
"loss": 0.2827, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.049206349206349, |
|
"grad_norm": 2.773526906967163, |
|
"learning_rate": 6.084656084656085e-06, |
|
"loss": 0.3025, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.057142857142857, |
|
"grad_norm": 2.659675121307373, |
|
"learning_rate": 5.99647266313933e-06, |
|
"loss": 0.2043, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.065079365079365, |
|
"grad_norm": 3.98606538772583, |
|
"learning_rate": 5.908289241622576e-06, |
|
"loss": 0.3236, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.073015873015873, |
|
"grad_norm": 27.466781616210938, |
|
"learning_rate": 5.820105820105821e-06, |
|
"loss": 0.1979, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.080952380952381, |
|
"grad_norm": 5.185789108276367, |
|
"learning_rate": 5.731922398589066e-06, |
|
"loss": 0.2881, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.088888888888889, |
|
"grad_norm": 11.606480598449707, |
|
"learning_rate": 5.6437389770723105e-06, |
|
"loss": 0.1399, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.0968253968253965, |
|
"grad_norm": 31.712146759033203, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.1561, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.10079365079365, |
|
"eval_accuracy": 0.8951965065502183, |
|
"eval_accuracy_hold": 0.7692307692307693, |
|
"eval_accuracy_preparation": 1.0, |
|
"eval_accuracy_recovery": 0.7058823529411765, |
|
"eval_accuracy_stroke": 0.0, |
|
"eval_accuracy_unknown": 0.6818181818181818, |
|
"eval_loss": 0.43630751967430115, |
|
"eval_runtime": 18.0386, |
|
"eval_samples_per_second": 12.695, |
|
"eval_steps_per_second": 1.608, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 5.003968253968254, |
|
"grad_norm": 0.8358302116394043, |
|
"learning_rate": 5.467372134038801e-06, |
|
"loss": 0.4914, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.011904761904762, |
|
"grad_norm": 26.23114585876465, |
|
"learning_rate": 5.3791887125220465e-06, |
|
"loss": 0.1426, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.01984126984127, |
|
"grad_norm": 21.137874603271484, |
|
"learning_rate": 5.291005291005291e-06, |
|
"loss": 0.3177, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.027777777777778, |
|
"grad_norm": 13.185637474060059, |
|
"learning_rate": 5.202821869488537e-06, |
|
"loss": 0.2641, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.035714285714286, |
|
"grad_norm": 22.404172897338867, |
|
"learning_rate": 5.114638447971782e-06, |
|
"loss": 0.122, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.0436507936507935, |
|
"grad_norm": 1.1821787357330322, |
|
"learning_rate": 5.026455026455027e-06, |
|
"loss": 0.214, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.051587301587301, |
|
"grad_norm": 0.8503882884979248, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 0.3925, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.059523809523809, |
|
"grad_norm": 26.318893432617188, |
|
"learning_rate": 4.850088183421517e-06, |
|
"loss": 0.1443, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 5.067460317460317, |
|
"grad_norm": 111.941162109375, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 0.1638, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 5.075396825396825, |
|
"grad_norm": 27.32931137084961, |
|
"learning_rate": 4.673721340388007e-06, |
|
"loss": 0.3608, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 5.083333333333333, |
|
"grad_norm": 117.25592041015625, |
|
"learning_rate": 4.585537918871252e-06, |
|
"loss": 0.0645, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 5.091269841269841, |
|
"grad_norm": 12.846317291259766, |
|
"learning_rate": 4.497354497354498e-06, |
|
"loss": 0.2111, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.099206349206349, |
|
"grad_norm": 45.810054779052734, |
|
"learning_rate": 4.409171075837743e-06, |
|
"loss": 0.0429, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 5.10079365079365, |
|
"eval_accuracy": 0.9432314410480349, |
|
"eval_accuracy_hold": 0.8846153846153846, |
|
"eval_accuracy_preparation": 0.9936708860759493, |
|
"eval_accuracy_recovery": 0.7058823529411765, |
|
"eval_accuracy_stroke": 0.5, |
|
"eval_accuracy_unknown": 0.9545454545454546, |
|
"eval_loss": 0.2210746556520462, |
|
"eval_runtime": 17.8804, |
|
"eval_samples_per_second": 12.807, |
|
"eval_steps_per_second": 1.622, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 6.006349206349206, |
|
"grad_norm": 35.88644027709961, |
|
"learning_rate": 4.3209876543209875e-06, |
|
"loss": 0.1086, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 6.014285714285714, |
|
"grad_norm": 0.47389915585517883, |
|
"learning_rate": 4.232804232804233e-06, |
|
"loss": 0.2244, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.022222222222222, |
|
"grad_norm": 0.22194349765777588, |
|
"learning_rate": 4.144620811287478e-06, |
|
"loss": 0.1538, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 6.03015873015873, |
|
"grad_norm": 50.78633499145508, |
|
"learning_rate": 4.0564373897707236e-06, |
|
"loss": 0.1374, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.038095238095238, |
|
"grad_norm": 0.39046162366867065, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.3255, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 6.046031746031746, |
|
"grad_norm": 56.010379791259766, |
|
"learning_rate": 3.8800705467372134e-06, |
|
"loss": 0.1618, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 6.053968253968254, |
|
"grad_norm": 0.16263973712921143, |
|
"learning_rate": 3.791887125220459e-06, |
|
"loss": 0.116, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 6.061904761904762, |
|
"grad_norm": 0.23908407986164093, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.1012, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 6.06984126984127, |
|
"grad_norm": 53.480140686035156, |
|
"learning_rate": 3.615520282186949e-06, |
|
"loss": 0.1767, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 6.0777777777777775, |
|
"grad_norm": 0.681148886680603, |
|
"learning_rate": 3.527336860670194e-06, |
|
"loss": 0.1364, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 6.085714285714285, |
|
"grad_norm": 6.9544267654418945, |
|
"learning_rate": 3.4391534391534394e-06, |
|
"loss": 0.0884, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 6.093650793650793, |
|
"grad_norm": 0.3266388177871704, |
|
"learning_rate": 3.3509700176366843e-06, |
|
"loss": 0.2294, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 6.10079365079365, |
|
"eval_accuracy": 0.9475982532751092, |
|
"eval_accuracy_hold": 0.8846153846153846, |
|
"eval_accuracy_preparation": 1.0, |
|
"eval_accuracy_recovery": 0.8823529411764706, |
|
"eval_accuracy_stroke": 0.16666666666666666, |
|
"eval_accuracy_unknown": 0.9090909090909091, |
|
"eval_loss": 0.20944689214229584, |
|
"eval_runtime": 16.986, |
|
"eval_samples_per_second": 13.482, |
|
"eval_steps_per_second": 1.707, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 7.000793650793651, |
|
"grad_norm": 4.641857147216797, |
|
"learning_rate": 3.2627865961199297e-06, |
|
"loss": 0.1946, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 7.008730158730159, |
|
"grad_norm": 33.782859802246094, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 0.0758, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.016666666666667, |
|
"grad_norm": 0.22343800961971283, |
|
"learning_rate": 3.08641975308642e-06, |
|
"loss": 0.0885, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 7.0246031746031745, |
|
"grad_norm": 4.360742092132568, |
|
"learning_rate": 2.998236331569665e-06, |
|
"loss": 0.016, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 7.032539682539682, |
|
"grad_norm": 0.2488667219877243, |
|
"learning_rate": 2.9100529100529103e-06, |
|
"loss": 0.1495, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 7.04047619047619, |
|
"grad_norm": 28.20427703857422, |
|
"learning_rate": 2.8218694885361552e-06, |
|
"loss": 0.1959, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 7.048412698412698, |
|
"grad_norm": 8.172640800476074, |
|
"learning_rate": 2.7336860670194006e-06, |
|
"loss": 0.0934, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 7.056349206349206, |
|
"grad_norm": 9.601517677307129, |
|
"learning_rate": 2.6455026455026455e-06, |
|
"loss": 0.2069, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 7.064285714285714, |
|
"grad_norm": 17.030954360961914, |
|
"learning_rate": 2.557319223985891e-06, |
|
"loss": 0.0529, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 7.072222222222222, |
|
"grad_norm": 0.2699105143547058, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.236, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 7.08015873015873, |
|
"grad_norm": 0.7129911184310913, |
|
"learning_rate": 2.380952380952381e-06, |
|
"loss": 0.015, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 7.088095238095238, |
|
"grad_norm": 20.803991317749023, |
|
"learning_rate": 2.292768959435626e-06, |
|
"loss": 0.07, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.096031746031746, |
|
"grad_norm": 31.60879135131836, |
|
"learning_rate": 2.2045855379188715e-06, |
|
"loss": 0.1214, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 7.10079365079365, |
|
"eval_accuracy": 0.9606986899563319, |
|
"eval_accuracy_hold": 0.8846153846153846, |
|
"eval_accuracy_preparation": 0.9936708860759493, |
|
"eval_accuracy_recovery": 0.8823529411764706, |
|
"eval_accuracy_stroke": 0.6666666666666666, |
|
"eval_accuracy_unknown": 0.9545454545454546, |
|
"eval_loss": 0.15864819288253784, |
|
"eval_runtime": 198.6859, |
|
"eval_samples_per_second": 1.153, |
|
"eval_steps_per_second": 0.146, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 8.003174603174603, |
|
"grad_norm": 0.32036104798316956, |
|
"learning_rate": 2.1164021164021164e-06, |
|
"loss": 0.0981, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 8.011111111111111, |
|
"grad_norm": 0.1587836891412735, |
|
"learning_rate": 2.0282186948853618e-06, |
|
"loss": 0.1222, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 8.019047619047619, |
|
"grad_norm": 9.007894515991211, |
|
"learning_rate": 1.9400352733686067e-06, |
|
"loss": 0.2571, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 8.026984126984127, |
|
"grad_norm": 0.25755736231803894, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0791, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 8.034920634920635, |
|
"grad_norm": 6.88518762588501, |
|
"learning_rate": 1.763668430335097e-06, |
|
"loss": 0.0597, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 8.042857142857143, |
|
"grad_norm": 28.681108474731445, |
|
"learning_rate": 1.6754850088183422e-06, |
|
"loss": 0.1355, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 8.05079365079365, |
|
"grad_norm": 0.7820481061935425, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 0.0102, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 8.058730158730159, |
|
"grad_norm": 0.11372612416744232, |
|
"learning_rate": 1.4991181657848325e-06, |
|
"loss": 0.0969, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 8.066666666666666, |
|
"grad_norm": 0.962800145149231, |
|
"learning_rate": 1.4109347442680776e-06, |
|
"loss": 0.0633, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.074603174603174, |
|
"grad_norm": 0.29850736260414124, |
|
"learning_rate": 1.3227513227513228e-06, |
|
"loss": 0.0743, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 8.082539682539682, |
|
"grad_norm": 0.2638128101825714, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 0.0603, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.09047619047619, |
|
"grad_norm": 4.3052215576171875, |
|
"learning_rate": 1.146384479717813e-06, |
|
"loss": 0.1776, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 8.098412698412698, |
|
"grad_norm": 7.065666675567627, |
|
"learning_rate": 1.0582010582010582e-06, |
|
"loss": 0.1478, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 8.100793650793651, |
|
"eval_accuracy": 0.9432314410480349, |
|
"eval_accuracy_hold": 0.8846153846153846, |
|
"eval_accuracy_preparation": 0.9683544303797469, |
|
"eval_accuracy_recovery": 0.8823529411764706, |
|
"eval_accuracy_stroke": 0.6666666666666666, |
|
"eval_accuracy_unknown": 0.9545454545454546, |
|
"eval_loss": 0.16072985529899597, |
|
"eval_runtime": 38.5322, |
|
"eval_samples_per_second": 5.943, |
|
"eval_steps_per_second": 0.753, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 9.005555555555556, |
|
"grad_norm": 1.259048342704773, |
|
"learning_rate": 9.700176366843034e-07, |
|
"loss": 0.0626, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 9.013492063492064, |
|
"grad_norm": 13.264341354370117, |
|
"learning_rate": 8.818342151675485e-07, |
|
"loss": 0.1718, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 9.021428571428572, |
|
"grad_norm": 0.916714608669281, |
|
"learning_rate": 7.936507936507937e-07, |
|
"loss": 0.0135, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 9.02936507936508, |
|
"grad_norm": 0.167047917842865, |
|
"learning_rate": 7.054673721340388e-07, |
|
"loss": 0.087, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 9.037301587301588, |
|
"grad_norm": 0.09918953478336334, |
|
"learning_rate": 6.17283950617284e-07, |
|
"loss": 0.0936, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 9.045238095238096, |
|
"grad_norm": 9.908056259155273, |
|
"learning_rate": 5.291005291005291e-07, |
|
"loss": 0.1706, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.053174603174604, |
|
"grad_norm": 0.15841275453567505, |
|
"learning_rate": 4.4091710758377425e-07, |
|
"loss": 0.0411, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 9.061111111111112, |
|
"grad_norm": 9.066829681396484, |
|
"learning_rate": 3.527336860670194e-07, |
|
"loss": 0.0798, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 9.06904761904762, |
|
"grad_norm": 15.856799125671387, |
|
"learning_rate": 2.6455026455026455e-07, |
|
"loss": 0.0698, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 9.076984126984128, |
|
"grad_norm": 57.29991149902344, |
|
"learning_rate": 1.763668430335097e-07, |
|
"loss": 0.0994, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 9.084920634920636, |
|
"grad_norm": 0.18446692824363708, |
|
"learning_rate": 8.818342151675485e-08, |
|
"loss": 0.1576, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 9.092857142857143, |
|
"grad_norm": 0.1289648562669754, |
|
"learning_rate": 0.0, |
|
"loss": 0.1156, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 9.092857142857143, |
|
"eval_accuracy": 0.9737991266375546, |
|
"eval_accuracy_hold": 1.0, |
|
"eval_accuracy_preparation": 0.9936708860759493, |
|
"eval_accuracy_recovery": 0.8823529411764706, |
|
"eval_accuracy_stroke": 0.6666666666666666, |
|
"eval_accuracy_unknown": 0.9545454545454546, |
|
"eval_loss": 0.11774494498968124, |
|
"eval_runtime": 38.5752, |
|
"eval_samples_per_second": 5.936, |
|
"eval_steps_per_second": 0.752, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 9.092857142857143, |
|
"step": 1260, |
|
"total_flos": 1.2549452224921436e+19, |
|
"train_loss": 0.42608252665589724, |
|
"train_runtime": 2594.6501, |
|
"train_samples_per_second": 3.885, |
|
"train_steps_per_second": 0.486 |
|
}, |
|
{ |
|
"epoch": 9.092857142857143, |
|
"eval_accuracy": 0.946058091286307, |
|
"eval_accuracy_hold": 1.0, |
|
"eval_accuracy_preparation": 0.9685534591194969, |
|
"eval_accuracy_recovery": 0.8947368421052632, |
|
"eval_accuracy_stroke": 0.42857142857142855, |
|
"eval_accuracy_unknown": 0.9285714285714286, |
|
"eval_loss": 0.19672314822673798, |
|
"eval_runtime": 33.9773, |
|
"eval_samples_per_second": 7.093, |
|
"eval_steps_per_second": 0.912, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 9.092857142857143, |
|
"eval_accuracy": 0.946058091286307, |
|
"eval_accuracy_hold": 1.0, |
|
"eval_accuracy_preparation": 0.9685534591194969, |
|
"eval_accuracy_recovery": 0.8947368421052632, |
|
"eval_accuracy_stroke": 0.42857142857142855, |
|
"eval_accuracy_unknown": 0.9285714285714286, |
|
"eval_loss": 0.19672317802906036, |
|
"eval_runtime": 30.5504, |
|
"eval_samples_per_second": 7.889, |
|
"eval_steps_per_second": 1.015, |
|
"step": 1260 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1260, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2549452224921436e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|