{ "best_metric": 0.6310679611650486, "best_model_checkpoint": "videomae-large-finetuned-right-hand-conflab-v3/checkpoint-531", "epoch": 10.034188034188034, "eval_steps": 500, "global_step": 618, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01221001221001221, "grad_norm": 10.883779525756836, "learning_rate": 6.0975609756097564e-06, "loss": 2.1132, "step": 10 }, { "epoch": 0.02442002442002442, "grad_norm": 9.386421203613281, "learning_rate": 1.2195121951219513e-05, "loss": 2.0109, "step": 20 }, { "epoch": 0.03663003663003663, "grad_norm": 9.540677070617676, "learning_rate": 1.8292682926829268e-05, "loss": 1.9597, "step": 30 }, { "epoch": 0.04884004884004884, "grad_norm": 8.591822624206543, "learning_rate": 2.4390243902439026e-05, "loss": 1.9359, "step": 40 }, { "epoch": 0.06105006105006105, "grad_norm": 8.493247985839844, "learning_rate": 3.048780487804878e-05, "loss": 1.9038, "step": 50 }, { "epoch": 0.07203907203907203, "eval_accuracy": 0.24757281553398058, "eval_loss": 2.092643976211548, "eval_runtime": 20.1931, "eval_samples_per_second": 10.201, "eval_steps_per_second": 0.644, "step": 59 }, { "epoch": 1.0012210012210012, "grad_norm": 9.048627853393555, "learning_rate": 3.6585365853658535e-05, "loss": 2.0497, "step": 60 }, { "epoch": 1.0134310134310134, "grad_norm": 5.44881010055542, "learning_rate": 4.26829268292683e-05, "loss": 2.0479, "step": 70 }, { "epoch": 1.0256410256410255, "grad_norm": 6.384305477142334, "learning_rate": 4.878048780487805e-05, "loss": 2.09, "step": 80 }, { "epoch": 1.037851037851038, "grad_norm": 4.374388694763184, "learning_rate": 4.94572591587517e-05, "loss": 2.0209, "step": 90 }, { "epoch": 1.05006105006105, "grad_norm": 4.627490043640137, "learning_rate": 4.877883310719132e-05, "loss": 2.0525, "step": 100 }, { "epoch": 1.0622710622710623, "grad_norm": 5.586881160736084, "learning_rate": 4.810040705563094e-05, "loss": 1.8963, "step": 110 }, { "epoch": 1.072039072039072, "eval_accuracy": 0.24271844660194175, "eval_loss": 1.9697831869125366, "eval_runtime": 21.1947, "eval_samples_per_second": 9.719, "eval_steps_per_second": 0.613, "step": 118 }, { "epoch": 2.0024420024420024, "grad_norm": 5.62977409362793, "learning_rate": 4.742198100407056e-05, "loss": 1.9983, "step": 120 }, { "epoch": 2.0146520146520146, "grad_norm": 4.484747886657715, "learning_rate": 4.674355495251018e-05, "loss": 1.9586, "step": 130 }, { "epoch": 2.0268620268620268, "grad_norm": 4.371191501617432, "learning_rate": 4.60651289009498e-05, "loss": 1.8729, "step": 140 }, { "epoch": 2.039072039072039, "grad_norm": 4.68417501449585, "learning_rate": 4.5386702849389416e-05, "loss": 2.0021, "step": 150 }, { "epoch": 2.051282051282051, "grad_norm": 12.167759895324707, "learning_rate": 4.470827679782904e-05, "loss": 1.7877, "step": 160 }, { "epoch": 2.0634920634920633, "grad_norm": 3.787353515625, "learning_rate": 4.402985074626866e-05, "loss": 1.9154, "step": 170 }, { "epoch": 2.0720390720390722, "eval_accuracy": 0.36893203883495146, "eval_loss": 1.7126853466033936, "eval_runtime": 18.7207, "eval_samples_per_second": 11.004, "eval_steps_per_second": 0.694, "step": 177 }, { "epoch": 3.0036630036630036, "grad_norm": 5.718711853027344, "learning_rate": 4.335142469470828e-05, "loss": 1.7051, "step": 180 }, { "epoch": 3.015873015873016, "grad_norm": 6.827328681945801, "learning_rate": 4.26729986431479e-05, "loss": 1.7233, "step": 190 }, { "epoch": 3.028083028083028, "grad_norm": 6.945235252380371, "learning_rate": 4.199457259158752e-05, "loss": 1.5634, "step": 200 }, { "epoch": 3.04029304029304, "grad_norm": 7.2410197257995605, "learning_rate": 4.131614654002714e-05, "loss": 1.5571, "step": 210 }, { "epoch": 3.0525030525030523, "grad_norm": 5.356701374053955, "learning_rate": 4.063772048846676e-05, "loss": 1.6352, "step": 220 }, { "epoch": 3.064713064713065, "grad_norm": 6.82385778427124, "learning_rate": 3.995929443690638e-05, "loss": 1.4173, "step": 230 }, { "epoch": 3.0720390720390722, "eval_accuracy": 0.5242718446601942, "eval_loss": 1.4571937322616577, "eval_runtime": 25.9349, "eval_samples_per_second": 7.943, "eval_steps_per_second": 0.501, "step": 236 }, { "epoch": 4.004884004884005, "grad_norm": 5.349239349365234, "learning_rate": 3.9280868385345995e-05, "loss": 1.5368, "step": 240 }, { "epoch": 4.017094017094017, "grad_norm": 7.013088703155518, "learning_rate": 3.860244233378562e-05, "loss": 1.3767, "step": 250 }, { "epoch": 4.029304029304029, "grad_norm": 8.280290603637695, "learning_rate": 3.792401628222524e-05, "loss": 1.4361, "step": 260 }, { "epoch": 4.041514041514041, "grad_norm": 8.19866943359375, "learning_rate": 3.724559023066486e-05, "loss": 1.193, "step": 270 }, { "epoch": 4.0537240537240535, "grad_norm": 7.372025012969971, "learning_rate": 3.656716417910448e-05, "loss": 1.3667, "step": 280 }, { "epoch": 4.065934065934066, "grad_norm": 5.639531135559082, "learning_rate": 3.58887381275441e-05, "loss": 1.3246, "step": 290 }, { "epoch": 4.072039072039072, "eval_accuracy": 0.5679611650485437, "eval_loss": 1.223657488822937, "eval_runtime": 20.7954, "eval_samples_per_second": 9.906, "eval_steps_per_second": 0.625, "step": 295 }, { "epoch": 5.006105006105006, "grad_norm": 7.878034591674805, "learning_rate": 3.521031207598372e-05, "loss": 1.1881, "step": 300 }, { "epoch": 5.018315018315018, "grad_norm": 7.679462909698486, "learning_rate": 3.453188602442334e-05, "loss": 1.1117, "step": 310 }, { "epoch": 5.03052503052503, "grad_norm": 10.07403564453125, "learning_rate": 3.385345997286296e-05, "loss": 1.0766, "step": 320 }, { "epoch": 5.042735042735043, "grad_norm": 7.193727970123291, "learning_rate": 3.3175033921302575e-05, "loss": 1.006, "step": 330 }, { "epoch": 5.054945054945055, "grad_norm": 5.925375461578369, "learning_rate": 3.24966078697422e-05, "loss": 0.9709, "step": 340 }, { "epoch": 5.067155067155067, "grad_norm": 13.94832706451416, "learning_rate": 3.181818181818182e-05, "loss": 1.077, "step": 350 }, { "epoch": 5.072039072039072, "eval_accuracy": 0.49514563106796117, "eval_loss": 1.4524530172348022, "eval_runtime": 22.2248, "eval_samples_per_second": 9.269, "eval_steps_per_second": 0.585, "step": 354 }, { "epoch": 6.007326007326007, "grad_norm": 8.016749382019043, "learning_rate": 3.113975576662144e-05, "loss": 0.9269, "step": 360 }, { "epoch": 6.0195360195360195, "grad_norm": 5.849711894989014, "learning_rate": 3.046132971506106e-05, "loss": 0.8371, "step": 370 }, { "epoch": 6.031746031746032, "grad_norm": 7.203096389770508, "learning_rate": 2.9782903663500678e-05, "loss": 0.7639, "step": 380 }, { "epoch": 6.043956043956044, "grad_norm": 7.49542236328125, "learning_rate": 2.91044776119403e-05, "loss": 0.8475, "step": 390 }, { "epoch": 6.056166056166056, "grad_norm": 9.251474380493164, "learning_rate": 2.842605156037992e-05, "loss": 0.8942, "step": 400 }, { "epoch": 6.068376068376068, "grad_norm": 8.035192489624023, "learning_rate": 2.7747625508819542e-05, "loss": 0.923, "step": 410 }, { "epoch": 6.072039072039072, "eval_accuracy": 0.587378640776699, "eval_loss": 1.3191975355148315, "eval_runtime": 21.2599, "eval_samples_per_second": 9.69, "eval_steps_per_second": 0.611, "step": 413 }, { "epoch": 7.0085470085470085, "grad_norm": 9.951005935668945, "learning_rate": 2.7069199457259158e-05, "loss": 0.6643, "step": 420 }, { "epoch": 7.020757020757021, "grad_norm": 10.067875862121582, "learning_rate": 2.639077340569878e-05, "loss": 0.6866, "step": 430 }, { "epoch": 7.032967032967033, "grad_norm": 11.081637382507324, "learning_rate": 2.57123473541384e-05, "loss": 0.5887, "step": 440 }, { "epoch": 7.045177045177045, "grad_norm": 7.662527084350586, "learning_rate": 2.5033921302578023e-05, "loss": 0.6175, "step": 450 }, { "epoch": 7.057387057387057, "grad_norm": 6.395303726196289, "learning_rate": 2.4355495251017642e-05, "loss": 0.6575, "step": 460 }, { "epoch": 7.069597069597069, "grad_norm": 12.816076278686523, "learning_rate": 2.367706919945726e-05, "loss": 0.651, "step": 470 }, { "epoch": 7.072039072039072, "eval_accuracy": 0.5728155339805825, "eval_loss": 1.3817497491836548, "eval_runtime": 22.418, "eval_samples_per_second": 9.189, "eval_steps_per_second": 0.58, "step": 472 }, { "epoch": 8.00976800976801, "grad_norm": 6.377563953399658, "learning_rate": 2.299864314789688e-05, "loss": 0.4579, "step": 480 }, { "epoch": 8.021978021978022, "grad_norm": 9.152300834655762, "learning_rate": 2.2320217096336503e-05, "loss": 0.4142, "step": 490 }, { "epoch": 8.034188034188034, "grad_norm": 6.605992317199707, "learning_rate": 2.164179104477612e-05, "loss": 0.4407, "step": 500 }, { "epoch": 8.046398046398046, "grad_norm": 14.947940826416016, "learning_rate": 2.0963364993215738e-05, "loss": 0.3638, "step": 510 }, { "epoch": 8.058608058608058, "grad_norm": 9.7440824508667, "learning_rate": 2.028493894165536e-05, "loss": 0.5602, "step": 520 }, { "epoch": 8.07081807081807, "grad_norm": 8.176590919494629, "learning_rate": 1.960651289009498e-05, "loss": 0.5092, "step": 530 }, { "epoch": 8.072039072039072, "eval_accuracy": 0.6310679611650486, "eval_loss": 1.3680421113967896, "eval_runtime": 18.341, "eval_samples_per_second": 11.232, "eval_steps_per_second": 0.709, "step": 531 }, { "epoch": 9.010989010989011, "grad_norm": 6.507408142089844, "learning_rate": 1.89280868385346e-05, "loss": 0.3515, "step": 540 }, { "epoch": 9.023199023199023, "grad_norm": 10.037909507751465, "learning_rate": 1.824966078697422e-05, "loss": 0.38, "step": 550 }, { "epoch": 9.035409035409035, "grad_norm": 8.945121765136719, "learning_rate": 1.757123473541384e-05, "loss": 0.2686, "step": 560 }, { "epoch": 9.047619047619047, "grad_norm": 7.725048542022705, "learning_rate": 1.689280868385346e-05, "loss": 0.318, "step": 570 }, { "epoch": 9.05982905982906, "grad_norm": 8.256610870361328, "learning_rate": 1.6214382632293083e-05, "loss": 0.3725, "step": 580 }, { "epoch": 9.072039072039072, "grad_norm": 11.454293251037598, "learning_rate": 1.55359565807327e-05, "loss": 0.2484, "step": 590 }, { "epoch": 9.072039072039072, "eval_accuracy": 0.6262135922330098, "eval_loss": 1.4326366186141968, "eval_runtime": 19.1344, "eval_samples_per_second": 10.766, "eval_steps_per_second": 0.679, "step": 590 }, { "epoch": 10.012210012210012, "grad_norm": 16.326658248901367, "learning_rate": 1.485753052917232e-05, "loss": 0.2977, "step": 600 }, { "epoch": 10.024420024420024, "grad_norm": 6.485968112945557, "learning_rate": 1.417910447761194e-05, "loss": 0.2218, "step": 610 }, { "epoch": 10.034188034188034, "eval_accuracy": 0.5804878048780487, "eval_loss": 1.7306667566299438, "eval_runtime": 28.969, "eval_samples_per_second": 7.077, "eval_steps_per_second": 0.449, "step": 618 }, { "epoch": 10.034188034188034, "eval_accuracy": 0.5804878048780487, "eval_loss": 1.7345412969589233, "eval_runtime": 19.4619, "eval_samples_per_second": 10.533, "eval_steps_per_second": 0.668, "step": 618 } ], "logging_steps": 10, "max_steps": 819, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.276946192165241e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }