{ "best_metric": 0.5404761904761904, "best_model_checkpoint": "videomae-base-finetuned-ElderReact-anger-balanced-hp/checkpoint-400", "epoch": 19.010416666666668, "eval_steps": 500, "global_step": 480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 4.783530235290527, "learning_rate": 0.00020833333333333335, "loss": 0.7372, "step": 10 }, { "epoch": 0.04, "grad_norm": 3.4003899097442627, "learning_rate": 0.0004166666666666667, "loss": 0.7532, "step": 20 }, { "epoch": 0.05, "eval_accuracy": 0.5238095238095238, "eval_loss": 0.7077798247337341, "eval_runtime": 171.2381, "eval_samples_per_second": 2.453, "eval_steps_per_second": 0.158, "step": 25 }, { "epoch": 1.01, "grad_norm": 4.148990631103516, "learning_rate": 0.000625, "loss": 0.8004, "step": 30 }, { "epoch": 1.03, "grad_norm": 2.1322150230407715, "learning_rate": 0.0008333333333333334, "loss": 0.7903, "step": 40 }, { "epoch": 1.05, "grad_norm": 13.590558052062988, "learning_rate": 0.0009953703703703704, "loss": 0.7571, "step": 50 }, { "epoch": 1.05, "eval_accuracy": 0.47619047619047616, "eval_loss": 0.7033982276916504, "eval_runtime": 171.2144, "eval_samples_per_second": 2.453, "eval_steps_per_second": 0.158, "step": 50 }, { "epoch": 2.02, "grad_norm": 3.682979106903076, "learning_rate": 0.0009722222222222222, "loss": 0.723, "step": 60 }, { "epoch": 2.04, "grad_norm": 1.8970884084701538, "learning_rate": 0.0009490740740740741, "loss": 0.7357, "step": 70 }, { "epoch": 2.05, "eval_accuracy": 0.44285714285714284, "eval_loss": 0.7080289125442505, "eval_runtime": 124.8954, "eval_samples_per_second": 3.363, "eval_steps_per_second": 0.216, "step": 75 }, { "epoch": 3.01, "grad_norm": 4.713415145874023, "learning_rate": 0.000925925925925926, "loss": 0.7389, "step": 80 }, { "epoch": 3.03, "grad_norm": 2.461824893951416, "learning_rate": 0.0009027777777777778, "loss": 0.6913, "step": 90 }, { "epoch": 3.05, "grad_norm": 9.549880981445312, "learning_rate": 0.0008796296296296296, "loss": 0.6976, "step": 100 }, { "epoch": 3.05, "eval_accuracy": 0.5238095238095238, "eval_loss": 0.716019332408905, "eval_runtime": 123.304, "eval_samples_per_second": 3.406, "eval_steps_per_second": 0.219, "step": 100 }, { "epoch": 4.02, "grad_norm": 2.853116273880005, "learning_rate": 0.0008564814814814815, "loss": 0.7026, "step": 110 }, { "epoch": 4.04, "grad_norm": 5.189712047576904, "learning_rate": 0.0008333333333333334, "loss": 0.7131, "step": 120 }, { "epoch": 4.05, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.6892691254615784, "eval_runtime": 124.5641, "eval_samples_per_second": 3.372, "eval_steps_per_second": 0.217, "step": 125 }, { "epoch": 5.01, "grad_norm": 9.913081169128418, "learning_rate": 0.0008101851851851853, "loss": 0.7349, "step": 130 }, { "epoch": 5.03, "grad_norm": 5.214167594909668, "learning_rate": 0.0007870370370370372, "loss": 0.7542, "step": 140 }, { "epoch": 5.05, "grad_norm": 7.558966636657715, "learning_rate": 0.0007638888888888888, "loss": 0.7275, "step": 150 }, { "epoch": 5.05, "eval_accuracy": 0.4928571428571429, "eval_loss": 0.8349756002426147, "eval_runtime": 173.5792, "eval_samples_per_second": 2.42, "eval_steps_per_second": 0.156, "step": 150 }, { "epoch": 6.02, "grad_norm": 0.9638558030128479, "learning_rate": 0.0007407407407407407, "loss": 0.8133, "step": 160 }, { "epoch": 6.04, "grad_norm": 2.0914902687072754, "learning_rate": 0.0007175925925925926, "loss": 0.7334, "step": 170 }, { "epoch": 6.05, "eval_accuracy": 0.4738095238095238, "eval_loss": 0.7126601338386536, "eval_runtime": 163.6881, "eval_samples_per_second": 2.566, "eval_steps_per_second": 0.165, "step": 175 }, { "epoch": 7.01, "grad_norm": 1.6074931621551514, "learning_rate": 0.0006944444444444445, "loss": 0.6938, "step": 180 }, { "epoch": 7.03, "grad_norm": 1.0195335149765015, "learning_rate": 0.0006712962962962962, "loss": 0.6986, "step": 190 }, { "epoch": 7.05, "grad_norm": 10.589719772338867, "learning_rate": 0.0006481481481481481, "loss": 0.7274, "step": 200 }, { "epoch": 7.05, "eval_accuracy": 0.5047619047619047, "eval_loss": 0.7088494896888733, "eval_runtime": 163.5307, "eval_samples_per_second": 2.568, "eval_steps_per_second": 0.165, "step": 200 }, { "epoch": 8.02, "grad_norm": 1.649876356124878, "learning_rate": 0.000625, "loss": 0.6904, "step": 210 }, { "epoch": 8.04, "grad_norm": 6.6575798988342285, "learning_rate": 0.0006018518518518519, "loss": 0.697, "step": 220 }, { "epoch": 8.05, "eval_accuracy": 0.5190476190476191, "eval_loss": 0.6910755038261414, "eval_runtime": 125.391, "eval_samples_per_second": 3.35, "eval_steps_per_second": 0.215, "step": 225 }, { "epoch": 9.01, "grad_norm": 3.279046058654785, "learning_rate": 0.0005787037037037038, "loss": 0.7072, "step": 230 }, { "epoch": 9.03, "grad_norm": 1.2495840787887573, "learning_rate": 0.0005555555555555556, "loss": 0.694, "step": 240 }, { "epoch": 9.05, "grad_norm": 11.012711524963379, "learning_rate": 0.0005324074074074074, "loss": 0.7605, "step": 250 }, { "epoch": 9.05, "eval_accuracy": 0.4976190476190476, "eval_loss": 0.7295739054679871, "eval_runtime": 127.4402, "eval_samples_per_second": 3.296, "eval_steps_per_second": 0.212, "step": 250 }, { "epoch": 10.02, "grad_norm": 1.9475804567337036, "learning_rate": 0.0005092592592592593, "loss": 0.7224, "step": 260 }, { "epoch": 10.04, "grad_norm": 2.378416061401367, "learning_rate": 0.0004861111111111111, "loss": 0.7105, "step": 270 }, { "epoch": 10.05, "eval_accuracy": 0.48333333333333334, "eval_loss": 0.710009753704071, "eval_runtime": 122.4996, "eval_samples_per_second": 3.429, "eval_steps_per_second": 0.22, "step": 275 }, { "epoch": 11.01, "grad_norm": 0.702026903629303, "learning_rate": 0.000462962962962963, "loss": 0.695, "step": 280 }, { "epoch": 11.03, "grad_norm": 2.676254987716675, "learning_rate": 0.0004398148148148148, "loss": 0.6865, "step": 290 }, { "epoch": 11.05, "grad_norm": 7.562650680541992, "learning_rate": 0.0004166666666666667, "loss": 0.6745, "step": 300 }, { "epoch": 11.05, "eval_accuracy": 0.45476190476190476, "eval_loss": 0.7270693778991699, "eval_runtime": 175.5484, "eval_samples_per_second": 2.393, "eval_steps_per_second": 0.154, "step": 300 }, { "epoch": 12.02, "grad_norm": 4.304387092590332, "learning_rate": 0.0003935185185185186, "loss": 0.6889, "step": 310 }, { "epoch": 12.04, "grad_norm": 0.35395267605781555, "learning_rate": 0.00037037037037037035, "loss": 0.7166, "step": 320 }, { "epoch": 12.05, "eval_accuracy": 0.5285714285714286, "eval_loss": 0.6954912543296814, "eval_runtime": 177.5444, "eval_samples_per_second": 2.366, "eval_steps_per_second": 0.152, "step": 325 }, { "epoch": 13.01, "grad_norm": 0.8164138793945312, "learning_rate": 0.00034722222222222224, "loss": 0.7363, "step": 330 }, { "epoch": 13.03, "grad_norm": 1.0167158842086792, "learning_rate": 0.00032407407407407406, "loss": 0.6993, "step": 340 }, { "epoch": 13.05, "grad_norm": 7.524409294128418, "learning_rate": 0.00030092592592592595, "loss": 0.6849, "step": 350 }, { "epoch": 13.05, "eval_accuracy": 0.4976190476190476, "eval_loss": 0.698103666305542, "eval_runtime": 161.7269, "eval_samples_per_second": 2.597, "eval_steps_per_second": 0.167, "step": 350 }, { "epoch": 14.02, "grad_norm": 1.1957156658172607, "learning_rate": 0.0002777777777777778, "loss": 0.6888, "step": 360 }, { "epoch": 14.04, "grad_norm": 1.1630483865737915, "learning_rate": 0.00025462962962962966, "loss": 0.6978, "step": 370 }, { "epoch": 14.05, "eval_accuracy": 0.49523809523809526, "eval_loss": 0.697565495967865, "eval_runtime": 162.8303, "eval_samples_per_second": 2.579, "eval_steps_per_second": 0.166, "step": 375 }, { "epoch": 15.01, "grad_norm": 2.8283958435058594, "learning_rate": 0.0002314814814814815, "loss": 0.6931, "step": 380 }, { "epoch": 15.03, "grad_norm": 1.4520641565322876, "learning_rate": 0.00020833333333333335, "loss": 0.6966, "step": 390 }, { "epoch": 15.05, "grad_norm": 9.017864227294922, "learning_rate": 0.00018518518518518518, "loss": 0.6928, "step": 400 }, { "epoch": 15.05, "eval_accuracy": 0.5404761904761904, "eval_loss": 0.6940857768058777, "eval_runtime": 165.3836, "eval_samples_per_second": 2.54, "eval_steps_per_second": 0.163, "step": 400 }, { "epoch": 16.02, "grad_norm": 2.541412591934204, "learning_rate": 0.00016203703703703703, "loss": 0.6795, "step": 410 }, { "epoch": 16.04, "grad_norm": 0.8520733118057251, "learning_rate": 0.0001388888888888889, "loss": 0.7057, "step": 420 }, { "epoch": 16.05, "eval_accuracy": 0.5, "eval_loss": 0.7022379636764526, "eval_runtime": 164.8925, "eval_samples_per_second": 2.547, "eval_steps_per_second": 0.164, "step": 425 }, { "epoch": 17.01, "grad_norm": 2.4603936672210693, "learning_rate": 0.00011574074074074075, "loss": 0.733, "step": 430 }, { "epoch": 17.03, "grad_norm": 2.6278584003448486, "learning_rate": 9.259259259259259e-05, "loss": 0.6902, "step": 440 }, { "epoch": 17.05, "grad_norm": 8.034465789794922, "learning_rate": 6.944444444444444e-05, "loss": 0.6842, "step": 450 }, { "epoch": 17.05, "eval_accuracy": 0.4738095238095238, "eval_loss": 0.6942616701126099, "eval_runtime": 172.9688, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.156, "step": 450 }, { "epoch": 18.02, "grad_norm": 1.0102304220199585, "learning_rate": 4.6296296296296294e-05, "loss": 0.6921, "step": 460 }, { "epoch": 18.04, "grad_norm": 1.547524094581604, "learning_rate": 2.3148148148148147e-05, "loss": 0.6824, "step": 470 }, { "epoch": 18.05, "eval_accuracy": 0.5166666666666667, "eval_loss": 0.694518506526947, "eval_runtime": 164.6584, "eval_samples_per_second": 2.551, "eval_steps_per_second": 0.164, "step": 475 }, { "epoch": 19.01, "grad_norm": 0.8265247941017151, "learning_rate": 0.0, "loss": 0.7065, "step": 480 }, { "epoch": 19.01, "eval_accuracy": 0.5142857142857142, "eval_loss": 0.6948422193527222, "eval_runtime": 157.477, "eval_samples_per_second": 2.667, "eval_steps_per_second": 0.171, "step": 480 }, { "epoch": 19.01, "step": 480, "total_flos": 9.214647639143547e+18, "train_loss": 0.7152137100696564, "train_runtime": 6018.7779, "train_samples_per_second": 1.276, "train_steps_per_second": 0.08 }, { "epoch": 19.01, "eval_accuracy": 0.4671916010498688, "eval_loss": 0.6937770247459412, "eval_runtime": 144.3495, "eval_samples_per_second": 2.639, "eval_steps_per_second": 0.166, "step": 480 }, { "epoch": 19.01, "eval_accuracy": 0.4671916010498688, "eval_loss": 0.6937723755836487, "eval_runtime": 152.0578, "eval_samples_per_second": 2.506, "eval_steps_per_second": 0.158, "step": 480 } ], "logging_steps": 10, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 9.214647639143547e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }