{ "best_metric": 0.6436170212765957, "best_model_checkpoint": "videomae-base-finetuned-kinetics-finetuned-conflab-traj-direction-lh-v1/checkpoint-630", "epoch": 6.134615384615385, "eval_steps": 500, "global_step": 728, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013736263736263736, "grad_norm": 7.975502967834473, "learning_rate": 6.849315068493151e-06, "loss": 2.1354, "step": 10 }, { "epoch": 0.027472527472527472, "grad_norm": 6.625336647033691, "learning_rate": 1.3698630136986302e-05, "loss": 2.0408, "step": 20 }, { "epoch": 0.04120879120879121, "grad_norm": 6.562021732330322, "learning_rate": 2.0547945205479453e-05, "loss": 1.9823, "step": 30 }, { "epoch": 0.054945054945054944, "grad_norm": 6.1020612716674805, "learning_rate": 2.7397260273972603e-05, "loss": 1.8703, "step": 40 }, { "epoch": 0.06868131868131869, "grad_norm": 5.716378211975098, "learning_rate": 3.424657534246575e-05, "loss": 1.938, "step": 50 }, { "epoch": 0.08241758241758242, "grad_norm": 10.489607810974121, "learning_rate": 4.1095890410958905e-05, "loss": 1.9538, "step": 60 }, { "epoch": 0.09615384615384616, "grad_norm": 7.923999786376953, "learning_rate": 4.794520547945205e-05, "loss": 1.8729, "step": 70 }, { "epoch": 0.10989010989010989, "grad_norm": 6.225552082061768, "learning_rate": 4.9465648854961834e-05, "loss": 1.8932, "step": 80 }, { "epoch": 0.12362637362637363, "grad_norm": 5.54323673248291, "learning_rate": 4.8702290076335885e-05, "loss": 1.785, "step": 90 }, { "epoch": 0.13736263736263737, "grad_norm": 8.549125671386719, "learning_rate": 4.793893129770993e-05, "loss": 1.6395, "step": 100 }, { "epoch": 0.14423076923076922, "eval_accuracy": 0.3723404255319149, "eval_loss": 1.6255276203155518, "eval_runtime": 26.9945, "eval_samples_per_second": 6.964, "eval_steps_per_second": 0.889, "step": 105 }, { "epoch": 1.0068681318681318, "grad_norm": 5.713430404663086, "learning_rate": 4.717557251908397e-05, "loss": 1.6571, "step": 110 }, { "epoch": 1.0206043956043955, "grad_norm": 10.411270141601562, "learning_rate": 4.641221374045801e-05, "loss": 1.7831, "step": 120 }, { "epoch": 1.0343406593406594, "grad_norm": 7.114817142486572, "learning_rate": 4.5648854961832065e-05, "loss": 1.4513, "step": 130 }, { "epoch": 1.0480769230769231, "grad_norm": 8.18074893951416, "learning_rate": 4.488549618320611e-05, "loss": 1.6207, "step": 140 }, { "epoch": 1.0618131868131868, "grad_norm": 8.822683334350586, "learning_rate": 4.4122137404580154e-05, "loss": 1.6768, "step": 150 }, { "epoch": 1.0755494505494505, "grad_norm": 9.030908584594727, "learning_rate": 4.33587786259542e-05, "loss": 1.4562, "step": 160 }, { "epoch": 1.0892857142857142, "grad_norm": 7.88232421875, "learning_rate": 4.2595419847328244e-05, "loss": 1.4371, "step": 170 }, { "epoch": 1.103021978021978, "grad_norm": 10.967424392700195, "learning_rate": 4.1832061068702296e-05, "loss": 1.5417, "step": 180 }, { "epoch": 1.1167582417582418, "grad_norm": 6.950323104858398, "learning_rate": 4.106870229007634e-05, "loss": 1.3942, "step": 190 }, { "epoch": 1.1304945054945055, "grad_norm": 8.856796264648438, "learning_rate": 4.0305343511450385e-05, "loss": 1.3338, "step": 200 }, { "epoch": 1.1442307692307692, "grad_norm": 8.829967498779297, "learning_rate": 3.954198473282443e-05, "loss": 1.4009, "step": 210 }, { "epoch": 1.1442307692307692, "eval_accuracy": 0.5053191489361702, "eval_loss": 1.4288405179977417, "eval_runtime": 21.3032, "eval_samples_per_second": 8.825, "eval_steps_per_second": 1.127, "step": 210 }, { "epoch": 2.0137362637362637, "grad_norm": 7.487637519836426, "learning_rate": 3.8778625954198475e-05, "loss": 1.2009, "step": 220 }, { "epoch": 2.0274725274725274, "grad_norm": 6.921274662017822, "learning_rate": 3.801526717557252e-05, "loss": 0.8766, "step": 230 }, { "epoch": 2.041208791208791, "grad_norm": 8.127293586730957, "learning_rate": 3.7251908396946565e-05, "loss": 1.0099, "step": 240 }, { "epoch": 2.0549450549450547, "grad_norm": 13.09719467163086, "learning_rate": 3.648854961832061e-05, "loss": 1.2435, "step": 250 }, { "epoch": 2.068681318681319, "grad_norm": 9.744394302368164, "learning_rate": 3.5725190839694654e-05, "loss": 0.9877, "step": 260 }, { "epoch": 2.0824175824175826, "grad_norm": 10.143108367919922, "learning_rate": 3.4961832061068706e-05, "loss": 1.1423, "step": 270 }, { "epoch": 2.0961538461538463, "grad_norm": 11.305399894714355, "learning_rate": 3.419847328244275e-05, "loss": 1.0389, "step": 280 }, { "epoch": 2.10989010989011, "grad_norm": 11.219425201416016, "learning_rate": 3.3435114503816796e-05, "loss": 0.8836, "step": 290 }, { "epoch": 2.1236263736263736, "grad_norm": 8.758235931396484, "learning_rate": 3.267175572519084e-05, "loss": 0.8425, "step": 300 }, { "epoch": 2.1373626373626373, "grad_norm": 11.10839557647705, "learning_rate": 3.190839694656489e-05, "loss": 1.0856, "step": 310 }, { "epoch": 2.144230769230769, "eval_accuracy": 0.574468085106383, "eval_loss": 1.2734872102737427, "eval_runtime": 25.3393, "eval_samples_per_second": 7.419, "eval_steps_per_second": 0.947, "step": 315 }, { "epoch": 3.006868131868132, "grad_norm": 6.112276077270508, "learning_rate": 3.114503816793894e-05, "loss": 1.0481, "step": 320 }, { "epoch": 3.0206043956043955, "grad_norm": 3.976270914077759, "learning_rate": 3.038167938931298e-05, "loss": 0.6704, "step": 330 }, { "epoch": 3.034340659340659, "grad_norm": 6.606512069702148, "learning_rate": 2.9618320610687023e-05, "loss": 0.6002, "step": 340 }, { "epoch": 3.048076923076923, "grad_norm": 8.499869346618652, "learning_rate": 2.885496183206107e-05, "loss": 0.576, "step": 350 }, { "epoch": 3.061813186813187, "grad_norm": 4.2253499031066895, "learning_rate": 2.8091603053435116e-05, "loss": 0.4514, "step": 360 }, { "epoch": 3.0755494505494507, "grad_norm": 8.19430923461914, "learning_rate": 2.732824427480916e-05, "loss": 0.6574, "step": 370 }, { "epoch": 3.0892857142857144, "grad_norm": 16.499576568603516, "learning_rate": 2.6564885496183206e-05, "loss": 0.7088, "step": 380 }, { "epoch": 3.103021978021978, "grad_norm": 15.13383960723877, "learning_rate": 2.580152671755725e-05, "loss": 0.7706, "step": 390 }, { "epoch": 3.116758241758242, "grad_norm": 10.932879447937012, "learning_rate": 2.50381679389313e-05, "loss": 0.3813, "step": 400 }, { "epoch": 3.1304945054945055, "grad_norm": 7.318915843963623, "learning_rate": 2.4274809160305344e-05, "loss": 0.4513, "step": 410 }, { "epoch": 3.144230769230769, "grad_norm": 11.104874610900879, "learning_rate": 2.351145038167939e-05, "loss": 0.6975, "step": 420 }, { "epoch": 3.144230769230769, "eval_accuracy": 0.6063829787234043, "eval_loss": 1.2628591060638428, "eval_runtime": 22.8443, "eval_samples_per_second": 8.23, "eval_steps_per_second": 1.051, "step": 420 }, { "epoch": 4.013736263736264, "grad_norm": 5.155519008636475, "learning_rate": 2.2748091603053437e-05, "loss": 0.2989, "step": 430 }, { "epoch": 4.027472527472527, "grad_norm": 2.615933656692505, "learning_rate": 2.198473282442748e-05, "loss": 0.3818, "step": 440 }, { "epoch": 4.041208791208791, "grad_norm": 9.644124031066895, "learning_rate": 2.122137404580153e-05, "loss": 0.4455, "step": 450 }, { "epoch": 4.054945054945055, "grad_norm": 5.017002105712891, "learning_rate": 2.0458015267175575e-05, "loss": 0.3644, "step": 460 }, { "epoch": 4.068681318681318, "grad_norm": 10.373658180236816, "learning_rate": 1.969465648854962e-05, "loss": 0.3132, "step": 470 }, { "epoch": 4.082417582417582, "grad_norm": 11.175583839416504, "learning_rate": 1.8931297709923664e-05, "loss": 0.3601, "step": 480 }, { "epoch": 4.096153846153846, "grad_norm": 6.8020172119140625, "learning_rate": 1.816793893129771e-05, "loss": 0.3351, "step": 490 }, { "epoch": 4.1098901098901095, "grad_norm": 6.1250810623168945, "learning_rate": 1.7404580152671757e-05, "loss": 0.3077, "step": 500 }, { "epoch": 4.123626373626374, "grad_norm": 20.65755844116211, "learning_rate": 1.6641221374045802e-05, "loss": 0.3634, "step": 510 }, { "epoch": 4.137362637362638, "grad_norm": 20.21697235107422, "learning_rate": 1.5877862595419847e-05, "loss": 0.3574, "step": 520 }, { "epoch": 4.144230769230769, "eval_accuracy": 0.6170212765957447, "eval_loss": 1.2030434608459473, "eval_runtime": 24.5243, "eval_samples_per_second": 7.666, "eval_steps_per_second": 0.979, "step": 525 }, { "epoch": 5.006868131868132, "grad_norm": 10.236026763916016, "learning_rate": 1.5114503816793894e-05, "loss": 0.2425, "step": 530 }, { "epoch": 5.020604395604396, "grad_norm": 3.8250298500061035, "learning_rate": 1.435114503816794e-05, "loss": 0.1923, "step": 540 }, { "epoch": 5.03434065934066, "grad_norm": 6.404925346374512, "learning_rate": 1.3587786259541985e-05, "loss": 0.1586, "step": 550 }, { "epoch": 5.048076923076923, "grad_norm": 4.29252815246582, "learning_rate": 1.2824427480916032e-05, "loss": 0.1486, "step": 560 }, { "epoch": 5.061813186813187, "grad_norm": 3.185380220413208, "learning_rate": 1.2061068702290076e-05, "loss": 0.1221, "step": 570 }, { "epoch": 5.075549450549451, "grad_norm": 6.5659918785095215, "learning_rate": 1.1297709923664123e-05, "loss": 0.2548, "step": 580 }, { "epoch": 5.089285714285714, "grad_norm": 4.746553897857666, "learning_rate": 1.0534351145038168e-05, "loss": 0.1904, "step": 590 }, { "epoch": 5.103021978021978, "grad_norm": 2.2111618518829346, "learning_rate": 9.770992366412214e-06, "loss": 0.2278, "step": 600 }, { "epoch": 5.116758241758242, "grad_norm": 0.6979553699493408, "learning_rate": 9.00763358778626e-06, "loss": 0.071, "step": 610 }, { "epoch": 5.1304945054945055, "grad_norm": 1.7735334634780884, "learning_rate": 8.244274809160306e-06, "loss": 0.218, "step": 620 }, { "epoch": 5.144230769230769, "grad_norm": 29.469202041625977, "learning_rate": 7.480916030534352e-06, "loss": 0.1654, "step": 630 }, { "epoch": 5.144230769230769, "eval_accuracy": 0.6436170212765957, "eval_loss": 1.2649437189102173, "eval_runtime": 33.5955, "eval_samples_per_second": 5.596, "eval_steps_per_second": 0.714, "step": 630 }, { "epoch": 6.013736263736264, "grad_norm": 0.3265649080276489, "learning_rate": 6.717557251908398e-06, "loss": 0.0701, "step": 640 }, { "epoch": 6.027472527472527, "grad_norm": 4.214682102203369, "learning_rate": 5.954198473282443e-06, "loss": 0.0474, "step": 650 }, { "epoch": 6.041208791208791, "grad_norm": 4.0008697509765625, "learning_rate": 5.190839694656488e-06, "loss": 0.1185, "step": 660 }, { "epoch": 6.054945054945055, "grad_norm": 16.712289810180664, "learning_rate": 4.427480916030535e-06, "loss": 0.0714, "step": 670 }, { "epoch": 6.068681318681318, "grad_norm": 1.800746202468872, "learning_rate": 3.6641221374045806e-06, "loss": 0.1259, "step": 680 }, { "epoch": 6.082417582417582, "grad_norm": 2.648703098297119, "learning_rate": 2.900763358778626e-06, "loss": 0.0904, "step": 690 }, { "epoch": 6.096153846153846, "grad_norm": 0.37296178936958313, "learning_rate": 2.137404580152672e-06, "loss": 0.0844, "step": 700 }, { "epoch": 6.1098901098901095, "grad_norm": 3.3622076511383057, "learning_rate": 1.3740458015267176e-06, "loss": 0.0761, "step": 710 }, { "epoch": 6.123626373626374, "grad_norm": 17.910383224487305, "learning_rate": 6.106870229007634e-07, "loss": 0.1179, "step": 720 }, { "epoch": 6.134615384615385, "eval_accuracy": 0.6329787234042553, "eval_loss": 1.2938581705093384, "eval_runtime": 21.981, "eval_samples_per_second": 8.553, "eval_steps_per_second": 1.092, "step": 728 }, { "epoch": 6.134615384615385, "step": 728, "total_flos": 7.220084549098144e+18, "train_loss": 0.8182922416663432, "train_runtime": 1426.9497, "train_samples_per_second": 4.081, "train_steps_per_second": 0.51 }, { "epoch": 6.134615384615385, "eval_accuracy": 0.6096256684491979, "eval_loss": 1.3609111309051514, "eval_runtime": 26.248, "eval_samples_per_second": 7.124, "eval_steps_per_second": 0.914, "step": 728 }, { "epoch": 6.134615384615385, "eval_accuracy": 0.6096256684491979, "eval_loss": 1.3609113693237305, "eval_runtime": 22.0997, "eval_samples_per_second": 8.462, "eval_steps_per_second": 1.086, "step": 728 } ], "logging_steps": 10, "max_steps": 728, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.220084549098144e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }