|
{ |
|
"best_metric": 0.9964912280701754, |
|
"best_model_checkpoint": "videomae-base-finetuned-bekhoaxe/checkpoint-636", |
|
"epoch": 3.25, |
|
"eval_steps": 500, |
|
"global_step": 848, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 10.807732582092285, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.6707, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.269227504730225, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.6435, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 10.753657341003418, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.571, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 9.196399688720703, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.4578, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.8521430492401123, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.2468, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.0225911140441895, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.1737, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.2649253010749817, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 0.0356, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 106.07608795166016, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 0.3572, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.02139919251203537, |
|
"learning_rate": 4.9672346002621236e-05, |
|
"loss": 0.2095, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 87.85816955566406, |
|
"learning_rate": 4.90170380078637e-05, |
|
"loss": 0.389, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4829784035682678, |
|
"learning_rate": 4.836173001310616e-05, |
|
"loss": 0.1773, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.020957980304956436, |
|
"learning_rate": 4.7706422018348626e-05, |
|
"loss": 0.0059, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.018635602667927742, |
|
"learning_rate": 4.705111402359109e-05, |
|
"loss": 1.1257, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.2680661976337433, |
|
"learning_rate": 4.6395806028833554e-05, |
|
"loss": 0.4074, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.034663423895835876, |
|
"learning_rate": 4.5740498034076015e-05, |
|
"loss": 0.0125, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.027404017746448517, |
|
"learning_rate": 4.508519003931848e-05, |
|
"loss": 0.1217, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 25.80691146850586, |
|
"learning_rate": 4.4429882044560943e-05, |
|
"loss": 0.3167, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.13836145401000977, |
|
"learning_rate": 4.3774574049803404e-05, |
|
"loss": 0.5605, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.010564002208411694, |
|
"learning_rate": 4.311926605504588e-05, |
|
"loss": 0.002, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.02207314595580101, |
|
"learning_rate": 4.246395806028834e-05, |
|
"loss": 0.1598, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.039745017886161804, |
|
"learning_rate": 4.18086500655308e-05, |
|
"loss": 0.337, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.9508771929824561, |
|
"eval_loss": 0.13453243672847748, |
|
"eval_runtime": 161.5975, |
|
"eval_samples_per_second": 1.764, |
|
"eval_steps_per_second": 0.885, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.155490756034851, |
|
"learning_rate": 4.115334207077327e-05, |
|
"loss": 0.3642, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.03908234462141991, |
|
"learning_rate": 4.049803407601573e-05, |
|
"loss": 0.0017, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.07498504221439362, |
|
"learning_rate": 3.984272608125819e-05, |
|
"loss": 0.0012, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.011099644936621189, |
|
"learning_rate": 3.918741808650066e-05, |
|
"loss": 0.6515, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.13020159304141998, |
|
"learning_rate": 3.8532110091743125e-05, |
|
"loss": 0.0009, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.03461850434541702, |
|
"learning_rate": 3.7876802096985586e-05, |
|
"loss": 0.3921, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.013056197203695774, |
|
"learning_rate": 3.722149410222805e-05, |
|
"loss": 0.2843, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.00816601887345314, |
|
"learning_rate": 3.6566186107470514e-05, |
|
"loss": 0.2704, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.4981946647167206, |
|
"learning_rate": 3.5910878112712975e-05, |
|
"loss": 0.0053, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.014170478098094463, |
|
"learning_rate": 3.5255570117955436e-05, |
|
"loss": 0.2984, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.03322271630167961, |
|
"learning_rate": 3.460026212319791e-05, |
|
"loss": 0.0012, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.019952110946178436, |
|
"learning_rate": 3.394495412844037e-05, |
|
"loss": 0.0167, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.8213207721710205, |
|
"learning_rate": 3.328964613368283e-05, |
|
"loss": 0.4437, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.026097455993294716, |
|
"learning_rate": 3.26343381389253e-05, |
|
"loss": 0.0015, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.32980480790138245, |
|
"learning_rate": 3.197903014416776e-05, |
|
"loss": 0.2044, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.3815198540687561, |
|
"learning_rate": 3.132372214941022e-05, |
|
"loss": 0.1395, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.014452760107815266, |
|
"learning_rate": 3.066841415465269e-05, |
|
"loss": 0.0014, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 7.192620754241943, |
|
"learning_rate": 3.0013106159895154e-05, |
|
"loss": 0.0059, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.009683230891823769, |
|
"learning_rate": 2.9357798165137618e-05, |
|
"loss": 0.0054, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.0035556950606405735, |
|
"learning_rate": 2.8702490170380082e-05, |
|
"loss": 0.0122, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.010539459995925426, |
|
"learning_rate": 2.8047182175622543e-05, |
|
"loss": 0.5753, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.9789473684210527, |
|
"eval_loss": 0.05574525147676468, |
|
"eval_runtime": 89.1683, |
|
"eval_samples_per_second": 3.196, |
|
"eval_steps_per_second": 1.604, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.017384065315127373, |
|
"learning_rate": 2.7391874180865007e-05, |
|
"loss": 0.0032, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.041157085448503494, |
|
"learning_rate": 2.673656618610747e-05, |
|
"loss": 0.0388, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.014596754685044289, |
|
"learning_rate": 2.6081258191349932e-05, |
|
"loss": 0.002, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.004826271440833807, |
|
"learning_rate": 2.5425950196592403e-05, |
|
"loss": 0.1837, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.01182369515299797, |
|
"learning_rate": 2.4770642201834864e-05, |
|
"loss": 0.2806, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.012596765533089638, |
|
"learning_rate": 2.411533420707733e-05, |
|
"loss": 0.001, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.014254101552069187, |
|
"learning_rate": 2.3460026212319793e-05, |
|
"loss": 0.0149, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.0014000836526975036, |
|
"learning_rate": 2.2804718217562254e-05, |
|
"loss": 0.3089, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.0013152866158634424, |
|
"learning_rate": 2.214941022280472e-05, |
|
"loss": 0.0019, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.0013162486720830202, |
|
"learning_rate": 2.1494102228047182e-05, |
|
"loss": 0.0012, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.01350224670022726, |
|
"learning_rate": 2.0838794233289646e-05, |
|
"loss": 0.0004, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.0011886212741956115, |
|
"learning_rate": 2.018348623853211e-05, |
|
"loss": 0.0002, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.013104724697768688, |
|
"learning_rate": 1.9528178243774575e-05, |
|
"loss": 0.0005, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 141.24769592285156, |
|
"learning_rate": 1.887287024901704e-05, |
|
"loss": 0.0858, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 89.11790466308594, |
|
"learning_rate": 1.82175622542595e-05, |
|
"loss": 0.6766, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.03498009964823723, |
|
"learning_rate": 1.7562254259501968e-05, |
|
"loss": 0.0006, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.03515917435288429, |
|
"learning_rate": 1.6906946264744432e-05, |
|
"loss": 0.0003, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.003264626022428274, |
|
"learning_rate": 1.6251638269986893e-05, |
|
"loss": 0.0004, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.011713879182934761, |
|
"learning_rate": 1.559633027522936e-05, |
|
"loss": 0.5946, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.06233768165111542, |
|
"learning_rate": 1.4941022280471823e-05, |
|
"loss": 0.1258, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.0129940714687109, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.1747, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 0.9964912280701754, |
|
"eval_loss": 0.009211267344653606, |
|
"eval_runtime": 75.987, |
|
"eval_samples_per_second": 3.751, |
|
"eval_steps_per_second": 1.882, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.0608808733522892, |
|
"learning_rate": 1.3630406290956751e-05, |
|
"loss": 0.0226, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.08016235381364822, |
|
"learning_rate": 1.2975098296199214e-05, |
|
"loss": 0.0644, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.6168311834335327, |
|
"learning_rate": 1.2319790301441678e-05, |
|
"loss": 0.0011, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.0013453299179673195, |
|
"learning_rate": 1.1664482306684142e-05, |
|
"loss": 0.1834, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.0041399141773581505, |
|
"learning_rate": 1.1009174311926607e-05, |
|
"loss": 0.2115, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.10097178816795349, |
|
"learning_rate": 1.035386631716907e-05, |
|
"loss": 0.2718, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.03554755076766014, |
|
"learning_rate": 9.698558322411533e-06, |
|
"loss": 0.0316, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.3528023958206177, |
|
"learning_rate": 9.043250327653998e-06, |
|
"loss": 0.001, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.016770780086517334, |
|
"learning_rate": 8.387942332896462e-06, |
|
"loss": 0.0004, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 0.6642473936080933, |
|
"learning_rate": 7.732634338138926e-06, |
|
"loss": 0.0012, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.0030567694921046495, |
|
"learning_rate": 7.07732634338139e-06, |
|
"loss": 0.2103, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 0.003925285767763853, |
|
"learning_rate": 6.422018348623854e-06, |
|
"loss": 0.0006, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 0.02660321444272995, |
|
"learning_rate": 5.766710353866317e-06, |
|
"loss": 0.0006, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 0.1314881145954132, |
|
"learning_rate": 5.1114023591087816e-06, |
|
"loss": 0.0008, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 0.008924220688641071, |
|
"learning_rate": 4.456094364351245e-06, |
|
"loss": 0.0021, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 0.02253740094602108, |
|
"learning_rate": 3.800786369593709e-06, |
|
"loss": 0.0005, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 0.006063047330826521, |
|
"learning_rate": 3.145478374836173e-06, |
|
"loss": 0.0005, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 0.01928607001900673, |
|
"learning_rate": 2.490170380078637e-06, |
|
"loss": 0.0002, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 0.2391887903213501, |
|
"learning_rate": 1.8348623853211011e-06, |
|
"loss": 0.0005, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"grad_norm": 0.012002573348581791, |
|
"learning_rate": 1.179554390563565e-06, |
|
"loss": 0.0003, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"grad_norm": 0.009785789065063, |
|
"learning_rate": 5.242463958060289e-07, |
|
"loss": 0.001, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.9964912280701754, |
|
"eval_loss": 0.005051769781857729, |
|
"eval_runtime": 84.2043, |
|
"eval_samples_per_second": 3.385, |
|
"eval_steps_per_second": 1.698, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"step": 848, |
|
"total_flos": 2.1133255437440778e+18, |
|
"train_loss": 0.16699378463049214, |
|
"train_runtime": 1543.4976, |
|
"train_samples_per_second": 1.099, |
|
"train_steps_per_second": 0.549 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.9887429643527205, |
|
"eval_loss": 0.020199885591864586, |
|
"eval_runtime": 328.4712, |
|
"eval_samples_per_second": 1.623, |
|
"eval_steps_per_second": 0.813, |
|
"step": 848 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 848, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 2.1133255437440778e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|