{ "best_metric": 1.0, "best_model_checkpoint": "videomae-base-finetuned-bekhoaxe/checkpoint-231", "epoch": 3.25, "eval_steps": 500, "global_step": 308, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 4.9310173988342285, "learning_rate": 1.6129032258064517e-05, "loss": 0.7032, "step": 10 }, { "epoch": 0.06, "grad_norm": 6.229051113128662, "learning_rate": 3.2258064516129034e-05, "loss": 0.5738, "step": 20 }, { "epoch": 0.1, "grad_norm": 9.958233833312988, "learning_rate": 4.8387096774193554e-05, "loss": 0.3415, "step": 30 }, { "epoch": 0.13, "grad_norm": 7.560524940490723, "learning_rate": 4.837545126353791e-05, "loss": 0.4689, "step": 40 }, { "epoch": 0.16, "grad_norm": 5.026527404785156, "learning_rate": 4.657039711191336e-05, "loss": 0.2069, "step": 50 }, { "epoch": 0.19, "grad_norm": 0.358181893825531, "learning_rate": 4.4765342960288806e-05, "loss": 0.0961, "step": 60 }, { "epoch": 0.23, "grad_norm": 0.05433480069041252, "learning_rate": 4.296028880866426e-05, "loss": 0.2172, "step": 70 }, { "epoch": 0.25, "eval_accuracy": 0.9915254237288136, "eval_loss": 0.013951542787253857, "eval_runtime": 38.5113, "eval_samples_per_second": 3.064, "eval_steps_per_second": 0.779, "step": 77 }, { "epoch": 1.01, "grad_norm": 1.3378506898880005, "learning_rate": 4.115523465703972e-05, "loss": 0.3363, "step": 80 }, { "epoch": 1.04, "grad_norm": 0.04731794446706772, "learning_rate": 3.935018050541516e-05, "loss": 0.195, "step": 90 }, { "epoch": 1.07, "grad_norm": 0.04140738397836685, "learning_rate": 3.754512635379062e-05, "loss": 0.0049, "step": 100 }, { "epoch": 1.11, "grad_norm": 0.08140890300273895, "learning_rate": 3.574007220216607e-05, "loss": 0.2396, "step": 110 }, { "epoch": 1.14, "grad_norm": 0.09178142994642258, "learning_rate": 3.3935018050541516e-05, "loss": 0.0746, "step": 120 }, { "epoch": 1.17, "grad_norm": 0.031925346702337265, "learning_rate": 3.212996389891697e-05, "loss": 0.3279, "step": 130 }, { "epoch": 1.2, "grad_norm": 0.11948370933532715, "learning_rate": 3.032490974729242e-05, "loss": 0.0021, "step": 140 }, { "epoch": 1.24, "grad_norm": 0.05676674470305443, "learning_rate": 2.851985559566787e-05, "loss": 0.1826, "step": 150 }, { "epoch": 1.25, "eval_accuracy": 0.9830508474576272, "eval_loss": 0.042561955749988556, "eval_runtime": 38.1931, "eval_samples_per_second": 3.09, "eval_steps_per_second": 0.785, "step": 154 }, { "epoch": 2.02, "grad_norm": 0.11792542040348053, "learning_rate": 2.6714801444043324e-05, "loss": 0.1442, "step": 160 }, { "epoch": 2.05, "grad_norm": 0.10978111624717712, "learning_rate": 2.4909747292418774e-05, "loss": 0.0912, "step": 170 }, { "epoch": 2.08, "grad_norm": 0.016077380627393723, "learning_rate": 2.3104693140794227e-05, "loss": 0.1236, "step": 180 }, { "epoch": 2.12, "grad_norm": 0.01301741786301136, "learning_rate": 2.1299638989169676e-05, "loss": 0.1501, "step": 190 }, { "epoch": 2.15, "grad_norm": 0.026157772168517113, "learning_rate": 1.9494584837545125e-05, "loss": 0.18, "step": 200 }, { "epoch": 2.18, "grad_norm": 0.04890529438853264, "learning_rate": 1.768953068592058e-05, "loss": 0.0038, "step": 210 }, { "epoch": 2.21, "grad_norm": 0.038881637156009674, "learning_rate": 1.588447653429603e-05, "loss": 0.0019, "step": 220 }, { "epoch": 2.25, "grad_norm": 0.01524051371961832, "learning_rate": 1.407942238267148e-05, "loss": 0.1427, "step": 230 }, { "epoch": 2.25, "eval_accuracy": 1.0, "eval_loss": 0.0019937974866479635, "eval_runtime": 38.3352, "eval_samples_per_second": 3.078, "eval_steps_per_second": 0.783, "step": 231 }, { "epoch": 3.03, "grad_norm": 0.01709669642150402, "learning_rate": 1.2274368231046932e-05, "loss": 0.0012, "step": 240 }, { "epoch": 3.06, "grad_norm": 0.1900138109922409, "learning_rate": 1.0469314079422383e-05, "loss": 0.1073, "step": 250 }, { "epoch": 3.09, "grad_norm": 0.06225905194878578, "learning_rate": 8.664259927797834e-06, "loss": 0.0029, "step": 260 }, { "epoch": 3.13, "grad_norm": 0.015592777170240879, "learning_rate": 6.859205776173286e-06, "loss": 0.0009, "step": 270 }, { "epoch": 3.16, "grad_norm": 0.016208168119192123, "learning_rate": 5.054151624548736e-06, "loss": 0.0036, "step": 280 }, { "epoch": 3.19, "grad_norm": 0.007616001646965742, "learning_rate": 3.2490974729241876e-06, "loss": 0.0008, "step": 290 }, { "epoch": 3.22, "grad_norm": 0.008425813168287277, "learning_rate": 1.4440433212996392e-06, "loss": 0.2853, "step": 300 }, { "epoch": 3.25, "eval_accuracy": 1.0, "eval_loss": 0.0012017178814858198, "eval_runtime": 38.094, "eval_samples_per_second": 3.098, "eval_steps_per_second": 0.788, "step": 308 }, { "epoch": 3.25, "step": 308, "total_flos": 1.5351515742291886e+18, "train_loss": 0.16999648642641577, "train_runtime": 584.5872, "train_samples_per_second": 2.107, "train_steps_per_second": 0.527 }, { "epoch": 3.25, "eval_accuracy": 0.9761904761904762, "eval_loss": 0.09526590257883072, "eval_runtime": 122.5253, "eval_samples_per_second": 1.714, "eval_steps_per_second": 0.433, "step": 308 } ], "logging_steps": 10, "max_steps": 308, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 1.5351515742291886e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }