|
{ |
|
"best_metric": 0.6310679611650486, |
|
"best_model_checkpoint": "videomae-large-finetuned-right-hand-conflab-v3/checkpoint-531", |
|
"epoch": 10.034188034188034, |
|
"eval_steps": 500, |
|
"global_step": 618, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01221001221001221, |
|
"grad_norm": 10.883779525756836, |
|
"learning_rate": 6.0975609756097564e-06, |
|
"loss": 2.1132, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02442002442002442, |
|
"grad_norm": 9.386421203613281, |
|
"learning_rate": 1.2195121951219513e-05, |
|
"loss": 2.0109, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"grad_norm": 9.540677070617676, |
|
"learning_rate": 1.8292682926829268e-05, |
|
"loss": 1.9597, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04884004884004884, |
|
"grad_norm": 8.591822624206543, |
|
"learning_rate": 2.4390243902439026e-05, |
|
"loss": 1.9359, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06105006105006105, |
|
"grad_norm": 8.493247985839844, |
|
"learning_rate": 3.048780487804878e-05, |
|
"loss": 1.9038, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07203907203907203, |
|
"eval_accuracy": 0.24757281553398058, |
|
"eval_loss": 2.092643976211548, |
|
"eval_runtime": 20.1931, |
|
"eval_samples_per_second": 10.201, |
|
"eval_steps_per_second": 0.644, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.0012210012210012, |
|
"grad_norm": 9.048627853393555, |
|
"learning_rate": 3.6585365853658535e-05, |
|
"loss": 2.0497, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0134310134310134, |
|
"grad_norm": 5.44881010055542, |
|
"learning_rate": 4.26829268292683e-05, |
|
"loss": 2.0479, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 6.384305477142334, |
|
"learning_rate": 4.878048780487805e-05, |
|
"loss": 2.09, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.037851037851038, |
|
"grad_norm": 4.374388694763184, |
|
"learning_rate": 4.94572591587517e-05, |
|
"loss": 2.0209, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.05006105006105, |
|
"grad_norm": 4.627490043640137, |
|
"learning_rate": 4.877883310719132e-05, |
|
"loss": 2.0525, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0622710622710623, |
|
"grad_norm": 5.586881160736084, |
|
"learning_rate": 4.810040705563094e-05, |
|
"loss": 1.8963, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.072039072039072, |
|
"eval_accuracy": 0.24271844660194175, |
|
"eval_loss": 1.9697831869125366, |
|
"eval_runtime": 21.1947, |
|
"eval_samples_per_second": 9.719, |
|
"eval_steps_per_second": 0.613, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.0024420024420024, |
|
"grad_norm": 5.62977409362793, |
|
"learning_rate": 4.742198100407056e-05, |
|
"loss": 1.9983, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0146520146520146, |
|
"grad_norm": 4.484747886657715, |
|
"learning_rate": 4.674355495251018e-05, |
|
"loss": 1.9586, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0268620268620268, |
|
"grad_norm": 4.371191501617432, |
|
"learning_rate": 4.60651289009498e-05, |
|
"loss": 1.8729, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.039072039072039, |
|
"grad_norm": 4.68417501449585, |
|
"learning_rate": 4.5386702849389416e-05, |
|
"loss": 2.0021, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"grad_norm": 12.167759895324707, |
|
"learning_rate": 4.470827679782904e-05, |
|
"loss": 1.7877, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0634920634920633, |
|
"grad_norm": 3.787353515625, |
|
"learning_rate": 4.402985074626866e-05, |
|
"loss": 1.9154, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.0720390720390722, |
|
"eval_accuracy": 0.36893203883495146, |
|
"eval_loss": 1.7126853466033936, |
|
"eval_runtime": 18.7207, |
|
"eval_samples_per_second": 11.004, |
|
"eval_steps_per_second": 0.694, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 3.0036630036630036, |
|
"grad_norm": 5.718711853027344, |
|
"learning_rate": 4.335142469470828e-05, |
|
"loss": 1.7051, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.015873015873016, |
|
"grad_norm": 6.827328681945801, |
|
"learning_rate": 4.26729986431479e-05, |
|
"loss": 1.7233, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.028083028083028, |
|
"grad_norm": 6.945235252380371, |
|
"learning_rate": 4.199457259158752e-05, |
|
"loss": 1.5634, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.04029304029304, |
|
"grad_norm": 7.2410197257995605, |
|
"learning_rate": 4.131614654002714e-05, |
|
"loss": 1.5571, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0525030525030523, |
|
"grad_norm": 5.356701374053955, |
|
"learning_rate": 4.063772048846676e-05, |
|
"loss": 1.6352, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.064713064713065, |
|
"grad_norm": 6.82385778427124, |
|
"learning_rate": 3.995929443690638e-05, |
|
"loss": 1.4173, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0720390720390722, |
|
"eval_accuracy": 0.5242718446601942, |
|
"eval_loss": 1.4571937322616577, |
|
"eval_runtime": 25.9349, |
|
"eval_samples_per_second": 7.943, |
|
"eval_steps_per_second": 0.501, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 4.004884004884005, |
|
"grad_norm": 5.349239349365234, |
|
"learning_rate": 3.9280868385345995e-05, |
|
"loss": 1.5368, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.017094017094017, |
|
"grad_norm": 7.013088703155518, |
|
"learning_rate": 3.860244233378562e-05, |
|
"loss": 1.3767, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.029304029304029, |
|
"grad_norm": 8.280290603637695, |
|
"learning_rate": 3.792401628222524e-05, |
|
"loss": 1.4361, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.041514041514041, |
|
"grad_norm": 8.19866943359375, |
|
"learning_rate": 3.724559023066486e-05, |
|
"loss": 1.193, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.0537240537240535, |
|
"grad_norm": 7.372025012969971, |
|
"learning_rate": 3.656716417910448e-05, |
|
"loss": 1.3667, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.065934065934066, |
|
"grad_norm": 5.639531135559082, |
|
"learning_rate": 3.58887381275441e-05, |
|
"loss": 1.3246, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.072039072039072, |
|
"eval_accuracy": 0.5679611650485437, |
|
"eval_loss": 1.223657488822937, |
|
"eval_runtime": 20.7954, |
|
"eval_samples_per_second": 9.906, |
|
"eval_steps_per_second": 0.625, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 5.006105006105006, |
|
"grad_norm": 7.878034591674805, |
|
"learning_rate": 3.521031207598372e-05, |
|
"loss": 1.1881, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.018315018315018, |
|
"grad_norm": 7.679462909698486, |
|
"learning_rate": 3.453188602442334e-05, |
|
"loss": 1.1117, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.03052503052503, |
|
"grad_norm": 10.07403564453125, |
|
"learning_rate": 3.385345997286296e-05, |
|
"loss": 1.0766, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.042735042735043, |
|
"grad_norm": 7.193727970123291, |
|
"learning_rate": 3.3175033921302575e-05, |
|
"loss": 1.006, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.054945054945055, |
|
"grad_norm": 5.925375461578369, |
|
"learning_rate": 3.24966078697422e-05, |
|
"loss": 0.9709, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.067155067155067, |
|
"grad_norm": 13.94832706451416, |
|
"learning_rate": 3.181818181818182e-05, |
|
"loss": 1.077, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.072039072039072, |
|
"eval_accuracy": 0.49514563106796117, |
|
"eval_loss": 1.4524530172348022, |
|
"eval_runtime": 22.2248, |
|
"eval_samples_per_second": 9.269, |
|
"eval_steps_per_second": 0.585, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 6.007326007326007, |
|
"grad_norm": 8.016749382019043, |
|
"learning_rate": 3.113975576662144e-05, |
|
"loss": 0.9269, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.0195360195360195, |
|
"grad_norm": 5.849711894989014, |
|
"learning_rate": 3.046132971506106e-05, |
|
"loss": 0.8371, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.031746031746032, |
|
"grad_norm": 7.203096389770508, |
|
"learning_rate": 2.9782903663500678e-05, |
|
"loss": 0.7639, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.043956043956044, |
|
"grad_norm": 7.49542236328125, |
|
"learning_rate": 2.91044776119403e-05, |
|
"loss": 0.8475, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.056166056166056, |
|
"grad_norm": 9.251474380493164, |
|
"learning_rate": 2.842605156037992e-05, |
|
"loss": 0.8942, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.068376068376068, |
|
"grad_norm": 8.035192489624023, |
|
"learning_rate": 2.7747625508819542e-05, |
|
"loss": 0.923, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.072039072039072, |
|
"eval_accuracy": 0.587378640776699, |
|
"eval_loss": 1.3191975355148315, |
|
"eval_runtime": 21.2599, |
|
"eval_samples_per_second": 9.69, |
|
"eval_steps_per_second": 0.611, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 7.0085470085470085, |
|
"grad_norm": 9.951005935668945, |
|
"learning_rate": 2.7069199457259158e-05, |
|
"loss": 0.6643, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.020757020757021, |
|
"grad_norm": 10.067875862121582, |
|
"learning_rate": 2.639077340569878e-05, |
|
"loss": 0.6866, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.032967032967033, |
|
"grad_norm": 11.081637382507324, |
|
"learning_rate": 2.57123473541384e-05, |
|
"loss": 0.5887, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.045177045177045, |
|
"grad_norm": 7.662527084350586, |
|
"learning_rate": 2.5033921302578023e-05, |
|
"loss": 0.6175, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.057387057387057, |
|
"grad_norm": 6.395303726196289, |
|
"learning_rate": 2.4355495251017642e-05, |
|
"loss": 0.6575, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.069597069597069, |
|
"grad_norm": 12.816076278686523, |
|
"learning_rate": 2.367706919945726e-05, |
|
"loss": 0.651, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.072039072039072, |
|
"eval_accuracy": 0.5728155339805825, |
|
"eval_loss": 1.3817497491836548, |
|
"eval_runtime": 22.418, |
|
"eval_samples_per_second": 9.189, |
|
"eval_steps_per_second": 0.58, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 8.00976800976801, |
|
"grad_norm": 6.377563953399658, |
|
"learning_rate": 2.299864314789688e-05, |
|
"loss": 0.4579, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.021978021978022, |
|
"grad_norm": 9.152300834655762, |
|
"learning_rate": 2.2320217096336503e-05, |
|
"loss": 0.4142, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.034188034188034, |
|
"grad_norm": 6.605992317199707, |
|
"learning_rate": 2.164179104477612e-05, |
|
"loss": 0.4407, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.046398046398046, |
|
"grad_norm": 14.947940826416016, |
|
"learning_rate": 2.0963364993215738e-05, |
|
"loss": 0.3638, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.058608058608058, |
|
"grad_norm": 9.7440824508667, |
|
"learning_rate": 2.028493894165536e-05, |
|
"loss": 0.5602, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.07081807081807, |
|
"grad_norm": 8.176590919494629, |
|
"learning_rate": 1.960651289009498e-05, |
|
"loss": 0.5092, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.072039072039072, |
|
"eval_accuracy": 0.6310679611650486, |
|
"eval_loss": 1.3680421113967896, |
|
"eval_runtime": 18.341, |
|
"eval_samples_per_second": 11.232, |
|
"eval_steps_per_second": 0.709, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 9.010989010989011, |
|
"grad_norm": 6.507408142089844, |
|
"learning_rate": 1.89280868385346e-05, |
|
"loss": 0.3515, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.023199023199023, |
|
"grad_norm": 10.037909507751465, |
|
"learning_rate": 1.824966078697422e-05, |
|
"loss": 0.38, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.035409035409035, |
|
"grad_norm": 8.945121765136719, |
|
"learning_rate": 1.757123473541384e-05, |
|
"loss": 0.2686, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.047619047619047, |
|
"grad_norm": 7.725048542022705, |
|
"learning_rate": 1.689280868385346e-05, |
|
"loss": 0.318, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 9.05982905982906, |
|
"grad_norm": 8.256610870361328, |
|
"learning_rate": 1.6214382632293083e-05, |
|
"loss": 0.3725, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.072039072039072, |
|
"grad_norm": 11.454293251037598, |
|
"learning_rate": 1.55359565807327e-05, |
|
"loss": 0.2484, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.072039072039072, |
|
"eval_accuracy": 0.6262135922330098, |
|
"eval_loss": 1.4326366186141968, |
|
"eval_runtime": 19.1344, |
|
"eval_samples_per_second": 10.766, |
|
"eval_steps_per_second": 0.679, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.012210012210012, |
|
"grad_norm": 16.326658248901367, |
|
"learning_rate": 1.485753052917232e-05, |
|
"loss": 0.2977, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.024420024420024, |
|
"grad_norm": 6.485968112945557, |
|
"learning_rate": 1.417910447761194e-05, |
|
"loss": 0.2218, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.034188034188034, |
|
"eval_accuracy": 0.5804878048780487, |
|
"eval_loss": 1.7306667566299438, |
|
"eval_runtime": 28.969, |
|
"eval_samples_per_second": 7.077, |
|
"eval_steps_per_second": 0.449, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 10.034188034188034, |
|
"eval_accuracy": 0.5804878048780487, |
|
"eval_loss": 1.7345412969589233, |
|
"eval_runtime": 19.4619, |
|
"eval_samples_per_second": 10.533, |
|
"eval_steps_per_second": 0.668, |
|
"step": 618 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 819, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.276946192165241e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|