{ "best_metric": 0.34081346423562414, "best_model_checkpoint": "videomae-base-finetuned-elderf1/checkpoint-73", "epoch": 9.0875, "eval_steps": 500, "global_step": 720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 4.3531270027160645, "learning_rate": 0.0001388888888888889, "loss": 1.7775, "step": 10 }, { "epoch": 0.03, "grad_norm": 5.537198543548584, "learning_rate": 0.0002777777777777778, "loss": 1.6877, "step": 20 }, { "epoch": 0.04, "grad_norm": 5.151556968688965, "learning_rate": 0.0004166666666666667, "loss": 1.7997, "step": 30 }, { "epoch": 0.06, "grad_norm": 5.214244365692139, "learning_rate": 0.0005555555555555556, "loss": 1.8051, "step": 40 }, { "epoch": 0.07, "grad_norm": 1.6136474609375, "learning_rate": 0.0006944444444444445, "loss": 1.7065, "step": 50 }, { "epoch": 0.08, "grad_norm": 5.423031330108643, "learning_rate": 0.0008333333333333334, "loss": 1.7922, "step": 60 }, { "epoch": 0.1, "grad_norm": 3.5037877559661865, "learning_rate": 0.0009722222222222222, "loss": 1.7358, "step": 70 }, { "epoch": 0.1, "eval_accuracy": 0.34081346423562414, "eval_loss": 1.692335844039917, "eval_runtime": 379.7283, "eval_samples_per_second": 3.755, "eval_steps_per_second": 0.237, "step": 73 }, { "epoch": 1.01, "grad_norm": 3.8850014209747314, "learning_rate": 0.0009876543209876543, "loss": 1.7738, "step": 80 }, { "epoch": 1.02, "grad_norm": 6.418526649475098, "learning_rate": 0.0009722222222222222, "loss": 1.8192, "step": 90 }, { "epoch": 1.04, "grad_norm": 6.298271179199219, "learning_rate": 0.0009567901234567902, "loss": 1.7833, "step": 100 }, { "epoch": 1.05, "grad_norm": 4.652337551116943, "learning_rate": 0.000941358024691358, "loss": 1.7534, "step": 110 }, { "epoch": 1.07, "grad_norm": 3.369626522064209, "learning_rate": 0.000925925925925926, "loss": 1.7376, "step": 120 }, { "epoch": 1.08, "grad_norm": 2.182107448577881, "learning_rate": 0.0009104938271604939, "loss": 1.6406, "step": 130 }, { "epoch": 1.09, "grad_norm": 3.763148069381714, "learning_rate": 0.0008950617283950618, "loss": 1.7163, "step": 140 }, { "epoch": 1.1, "eval_accuracy": 0.3373071528751753, "eval_loss": 1.66623854637146, "eval_runtime": 421.0248, "eval_samples_per_second": 3.387, "eval_steps_per_second": 0.214, "step": 146 }, { "epoch": 2.01, "grad_norm": 4.109099864959717, "learning_rate": 0.0008796296296296296, "loss": 1.6734, "step": 150 }, { "epoch": 2.02, "grad_norm": 3.7187507152557373, "learning_rate": 0.0008641975308641975, "loss": 1.7535, "step": 160 }, { "epoch": 2.03, "grad_norm": 2.9596164226531982, "learning_rate": 0.0008487654320987654, "loss": 1.7282, "step": 170 }, { "epoch": 2.05, "grad_norm": 5.126859188079834, "learning_rate": 0.0008333333333333334, "loss": 1.7021, "step": 180 }, { "epoch": 2.06, "grad_norm": 2.36877703666687, "learning_rate": 0.0008179012345679012, "loss": 1.6977, "step": 190 }, { "epoch": 2.08, "grad_norm": 4.42868185043335, "learning_rate": 0.0008024691358024692, "loss": 1.6882, "step": 200 }, { "epoch": 2.09, "grad_norm": 3.4869942665100098, "learning_rate": 0.0007870370370370372, "loss": 1.7018, "step": 210 }, { "epoch": 2.1, "eval_accuracy": 0.34081346423562414, "eval_loss": 1.6378456354141235, "eval_runtime": 423.5487, "eval_samples_per_second": 3.367, "eval_steps_per_second": 0.212, "step": 219 }, { "epoch": 3.0, "grad_norm": 3.7394754886627197, "learning_rate": 0.0007716049382716049, "loss": 1.7356, "step": 220 }, { "epoch": 3.02, "grad_norm": 1.3488916158676147, "learning_rate": 0.0007561728395061729, "loss": 1.7024, "step": 230 }, { "epoch": 3.03, "grad_norm": 3.196521282196045, "learning_rate": 0.0007407407407407407, "loss": 1.7094, "step": 240 }, { "epoch": 3.04, "grad_norm": 3.9984352588653564, "learning_rate": 0.0007253086419753087, "loss": 1.6481, "step": 250 }, { "epoch": 3.06, "grad_norm": 3.6886866092681885, "learning_rate": 0.0007098765432098766, "loss": 1.652, "step": 260 }, { "epoch": 3.07, "grad_norm": 2.245149850845337, "learning_rate": 0.0006944444444444445, "loss": 1.7479, "step": 270 }, { "epoch": 3.08, "grad_norm": 4.62326192855835, "learning_rate": 0.0006790123456790124, "loss": 1.7129, "step": 280 }, { "epoch": 3.1, "grad_norm": 4.474867343902588, "learning_rate": 0.0006635802469135802, "loss": 1.7334, "step": 290 }, { "epoch": 3.1, "eval_accuracy": 0.34011220196353437, "eval_loss": 1.6562532186508179, "eval_runtime": 383.4418, "eval_samples_per_second": 3.719, "eval_steps_per_second": 0.235, "step": 292 }, { "epoch": 4.01, "grad_norm": 1.4795947074890137, "learning_rate": 0.0006481481481481481, "loss": 1.7427, "step": 300 }, { "epoch": 4.03, "grad_norm": 2.173116683959961, "learning_rate": 0.0006327160493827161, "loss": 1.6894, "step": 310 }, { "epoch": 4.04, "grad_norm": 2.731816291809082, "learning_rate": 0.0006172839506172839, "loss": 1.6818, "step": 320 }, { "epoch": 4.05, "grad_norm": 2.120103120803833, "learning_rate": 0.0006018518518518519, "loss": 1.6782, "step": 330 }, { "epoch": 4.07, "grad_norm": 3.7916502952575684, "learning_rate": 0.0005864197530864199, "loss": 1.7388, "step": 340 }, { "epoch": 4.08, "grad_norm": 2.852003812789917, "learning_rate": 0.0005709876543209876, "loss": 1.7422, "step": 350 }, { "epoch": 4.09, "grad_norm": 6.300606727600098, "learning_rate": 0.0005555555555555556, "loss": 1.672, "step": 360 }, { "epoch": 4.1, "eval_accuracy": 0.23983169705469845, "eval_loss": 1.6567574739456177, "eval_runtime": 310.9306, "eval_samples_per_second": 4.586, "eval_steps_per_second": 0.289, "step": 365 }, { "epoch": 5.01, "grad_norm": 5.469892501831055, "learning_rate": 0.0005401234567901234, "loss": 1.6958, "step": 370 }, { "epoch": 5.02, "grad_norm": 3.4026269912719727, "learning_rate": 0.0005246913580246914, "loss": 1.7383, "step": 380 }, { "epoch": 5.03, "grad_norm": 2.7335870265960693, "learning_rate": 0.0005092592592592593, "loss": 1.6763, "step": 390 }, { "epoch": 5.05, "grad_norm": 4.079995155334473, "learning_rate": 0.0004938271604938272, "loss": 1.693, "step": 400 }, { "epoch": 5.06, "grad_norm": 2.8691930770874023, "learning_rate": 0.0004783950617283951, "loss": 1.7293, "step": 410 }, { "epoch": 5.08, "grad_norm": 2.4010772705078125, "learning_rate": 0.000462962962962963, "loss": 1.6812, "step": 420 }, { "epoch": 5.09, "grad_norm": 4.233634948730469, "learning_rate": 0.0004475308641975309, "loss": 1.7095, "step": 430 }, { "epoch": 5.1, "eval_accuracy": 0.3387096774193548, "eval_loss": 1.6313395500183105, "eval_runtime": 306.7441, "eval_samples_per_second": 4.649, "eval_steps_per_second": 0.293, "step": 438 }, { "epoch": 6.0, "grad_norm": 2.844386100769043, "learning_rate": 0.00043209876543209873, "loss": 1.6502, "step": 440 }, { "epoch": 6.02, "grad_norm": 3.7761685848236084, "learning_rate": 0.0004166666666666667, "loss": 1.6892, "step": 450 }, { "epoch": 6.03, "grad_norm": 2.8903658390045166, "learning_rate": 0.0004012345679012346, "loss": 1.6006, "step": 460 }, { "epoch": 6.04, "grad_norm": 2.548739194869995, "learning_rate": 0.00038580246913580245, "loss": 1.7113, "step": 470 }, { "epoch": 6.06, "grad_norm": 3.1980948448181152, "learning_rate": 0.00037037037037037035, "loss": 1.7259, "step": 480 }, { "epoch": 6.07, "grad_norm": 3.119049549102783, "learning_rate": 0.0003549382716049383, "loss": 1.6634, "step": 490 }, { "epoch": 6.09, "grad_norm": 3.465067148208618, "learning_rate": 0.0003395061728395062, "loss": 1.71, "step": 500 }, { "epoch": 6.1, "grad_norm": 1.507797122001648, "learning_rate": 0.00032407407407407406, "loss": 1.7119, "step": 510 }, { "epoch": 6.1, "eval_accuracy": 0.34081346423562414, "eval_loss": 1.6309115886688232, "eval_runtime": 295.3653, "eval_samples_per_second": 4.828, "eval_steps_per_second": 0.305, "step": 511 }, { "epoch": 7.01, "grad_norm": 1.939512848854065, "learning_rate": 0.00030864197530864197, "loss": 1.6486, "step": 520 }, { "epoch": 7.03, "grad_norm": 2.4698586463928223, "learning_rate": 0.00029320987654320993, "loss": 1.6754, "step": 530 }, { "epoch": 7.04, "grad_norm": 1.9678858518600464, "learning_rate": 0.0002777777777777778, "loss": 1.6025, "step": 540 }, { "epoch": 7.05, "grad_norm": 3.6356217861175537, "learning_rate": 0.0002623456790123457, "loss": 1.6792, "step": 550 }, { "epoch": 7.07, "grad_norm": 2.781039237976074, "learning_rate": 0.0002469135802469136, "loss": 1.7165, "step": 560 }, { "epoch": 7.08, "grad_norm": 4.021714687347412, "learning_rate": 0.0002314814814814815, "loss": 1.6836, "step": 570 }, { "epoch": 7.1, "grad_norm": 4.392849922180176, "learning_rate": 0.00021604938271604937, "loss": 1.6981, "step": 580 }, { "epoch": 7.1, "eval_accuracy": 0.3288920056100982, "eval_loss": 1.6518133878707886, "eval_runtime": 319.5699, "eval_samples_per_second": 4.462, "eval_steps_per_second": 0.282, "step": 584 }, { "epoch": 8.01, "grad_norm": 2.291691303253174, "learning_rate": 0.0002006172839506173, "loss": 1.6548, "step": 590 }, { "epoch": 8.02, "grad_norm": 4.06191873550415, "learning_rate": 0.00018518518518518518, "loss": 1.5957, "step": 600 }, { "epoch": 8.04, "grad_norm": 3.735381603240967, "learning_rate": 0.0001697530864197531, "loss": 1.6757, "step": 610 }, { "epoch": 8.05, "grad_norm": 3.7696151733398438, "learning_rate": 0.00015432098765432098, "loss": 1.6349, "step": 620 }, { "epoch": 8.06, "grad_norm": 2.210860013961792, "learning_rate": 0.0001388888888888889, "loss": 1.7561, "step": 630 }, { "epoch": 8.08, "grad_norm": 2.812994956970215, "learning_rate": 0.0001234567901234568, "loss": 1.6331, "step": 640 }, { "epoch": 8.09, "grad_norm": 3.606325626373291, "learning_rate": 0.00010802469135802468, "loss": 1.7066, "step": 650 }, { "epoch": 8.1, "eval_accuracy": 0.33099579242636745, "eval_loss": 1.6313475370407104, "eval_runtime": 304.3275, "eval_samples_per_second": 4.686, "eval_steps_per_second": 0.296, "step": 657 }, { "epoch": 9.0, "grad_norm": 2.993828296661377, "learning_rate": 9.259259259259259e-05, "loss": 1.6485, "step": 660 }, { "epoch": 9.02, "grad_norm": 1.9592925310134888, "learning_rate": 7.716049382716049e-05, "loss": 1.6776, "step": 670 }, { "epoch": 9.03, "grad_norm": 3.105025291442871, "learning_rate": 6.17283950617284e-05, "loss": 1.6466, "step": 680 }, { "epoch": 9.05, "grad_norm": 3.643643856048584, "learning_rate": 4.6296296296296294e-05, "loss": 1.6544, "step": 690 }, { "epoch": 9.06, "grad_norm": 2.5237057209014893, "learning_rate": 3.08641975308642e-05, "loss": 1.6616, "step": 700 }, { "epoch": 9.07, "grad_norm": 3.834527015686035, "learning_rate": 1.54320987654321e-05, "loss": 1.6271, "step": 710 }, { "epoch": 9.09, "grad_norm": 1.777999997138977, "learning_rate": 0.0, "loss": 1.6476, "step": 720 }, { "epoch": 9.09, "eval_accuracy": 0.3288920056100982, "eval_loss": 1.6337770223617554, "eval_runtime": 340.2408, "eval_samples_per_second": 4.191, "eval_steps_per_second": 0.265, "step": 720 }, { "epoch": 9.09, "step": 720, "total_flos": 1.4231811343419113e+19, "train_loss": 1.6995894723468357, "train_runtime": 6776.3985, "train_samples_per_second": 1.7, "train_steps_per_second": 0.106 }, { "epoch": 9.09, "eval_accuracy": 0.34811715481171546, "eval_loss": 1.703281283378601, "eval_runtime": 256.932, "eval_samples_per_second": 4.651, "eval_steps_per_second": 0.292, "step": 720 }, { "epoch": 9.09, "eval_accuracy": 0.34811715481171546, "eval_loss": 1.7031110525131226, "eval_runtime": 266.484, "eval_samples_per_second": 4.484, "eval_steps_per_second": 0.281, "step": 720 } ], "logging_steps": 10, "max_steps": 720, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 1.4231811343419113e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }