{ "best_metric": 0.85, "best_model_checkpoint": "videomae-base-finetuned-engine-subset-20230310/checkpoint-372", "epoch": 19.018333333333334, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 8.333333333333334e-06, "loss": 2.8307, "step": 10 }, { "epoch": 0.03, "learning_rate": 1.6666666666666667e-05, "loss": 2.6132, "step": 20 }, { "epoch": 0.05, "learning_rate": 2.5e-05, "loss": 2.5947, "step": 30 }, { "epoch": 0.05, "eval_accuracy": 0.15, "eval_loss": 2.538285493850708, "eval_runtime": 64.1944, "eval_samples_per_second": 1.246, "eval_steps_per_second": 0.218, "step": 31 }, { "epoch": 1.01, "learning_rate": 3.3333333333333335e-05, "loss": 2.5062, "step": 40 }, { "epoch": 1.03, "learning_rate": 4.166666666666667e-05, "loss": 2.665, "step": 50 }, { "epoch": 1.05, "learning_rate": 5e-05, "loss": 2.4195, "step": 60 }, { "epoch": 1.05, "eval_accuracy": 0.15, "eval_loss": 2.5108141899108887, "eval_runtime": 62.8569, "eval_samples_per_second": 1.273, "eval_steps_per_second": 0.223, "step": 62 }, { "epoch": 2.01, "learning_rate": 4.9074074074074075e-05, "loss": 2.3539, "step": 70 }, { "epoch": 2.03, "learning_rate": 4.814814814814815e-05, "loss": 2.3374, "step": 80 }, { "epoch": 2.05, "learning_rate": 4.722222222222222e-05, "loss": 2.2476, "step": 90 }, { "epoch": 2.05, "eval_accuracy": 0.225, "eval_loss": 2.0532896518707275, "eval_runtime": 62.7732, "eval_samples_per_second": 1.274, "eval_steps_per_second": 0.223, "step": 93 }, { "epoch": 3.01, "learning_rate": 4.62962962962963e-05, "loss": 2.2757, "step": 100 }, { "epoch": 3.03, "learning_rate": 4.5370370370370374e-05, "loss": 1.9768, "step": 110 }, { "epoch": 3.04, "learning_rate": 4.4444444444444447e-05, "loss": 1.9449, "step": 120 }, { "epoch": 3.05, "eval_accuracy": 0.2375, "eval_loss": 2.071887493133545, "eval_runtime": 65.8713, "eval_samples_per_second": 1.214, "eval_steps_per_second": 0.213, "step": 124 }, { "epoch": 4.01, "learning_rate": 4.351851851851852e-05, "loss": 1.7178, "step": 130 }, { "epoch": 4.03, "learning_rate": 4.259259259259259e-05, "loss": 1.6786, "step": 140 }, { "epoch": 4.04, "learning_rate": 4.166666666666667e-05, "loss": 1.5724, "step": 150 }, { "epoch": 4.05, "eval_accuracy": 0.475, "eval_loss": 1.4755998849868774, "eval_runtime": 62.826, "eval_samples_per_second": 1.273, "eval_steps_per_second": 0.223, "step": 155 }, { "epoch": 5.01, "learning_rate": 4.074074074074074e-05, "loss": 1.3465, "step": 160 }, { "epoch": 5.03, "learning_rate": 3.981481481481482e-05, "loss": 1.3641, "step": 170 }, { "epoch": 5.04, "learning_rate": 3.888888888888889e-05, "loss": 1.395, "step": 180 }, { "epoch": 5.05, "eval_accuracy": 0.5, "eval_loss": 1.2884117364883423, "eval_runtime": 63.6279, "eval_samples_per_second": 1.257, "eval_steps_per_second": 0.22, "step": 186 }, { "epoch": 6.01, "learning_rate": 3.7962962962962964e-05, "loss": 1.1859, "step": 190 }, { "epoch": 6.02, "learning_rate": 3.7037037037037037e-05, "loss": 1.2224, "step": 200 }, { "epoch": 6.04, "learning_rate": 3.611111111111111e-05, "loss": 1.0822, "step": 210 }, { "epoch": 6.05, "eval_accuracy": 0.575, "eval_loss": 1.0678651332855225, "eval_runtime": 63.3472, "eval_samples_per_second": 1.263, "eval_steps_per_second": 0.221, "step": 217 }, { "epoch": 7.0, "learning_rate": 3.518518518518519e-05, "loss": 0.9066, "step": 220 }, { "epoch": 7.02, "learning_rate": 3.425925925925926e-05, "loss": 0.7939, "step": 230 }, { "epoch": 7.04, "learning_rate": 3.3333333333333335e-05, "loss": 1.0635, "step": 240 }, { "epoch": 7.05, "eval_accuracy": 0.7, "eval_loss": 0.8040415048599243, "eval_runtime": 63.8719, "eval_samples_per_second": 1.253, "eval_steps_per_second": 0.219, "step": 248 }, { "epoch": 8.0, "learning_rate": 3.240740740740741e-05, "loss": 1.1727, "step": 250 }, { "epoch": 8.02, "learning_rate": 3.148148148148148e-05, "loss": 0.8291, "step": 260 }, { "epoch": 8.04, "learning_rate": 3.055555555555556e-05, "loss": 0.8707, "step": 270 }, { "epoch": 8.05, "eval_accuracy": 0.525, "eval_loss": 0.9334062337875366, "eval_runtime": 64.0172, "eval_samples_per_second": 1.25, "eval_steps_per_second": 0.219, "step": 279 }, { "epoch": 9.0, "learning_rate": 2.962962962962963e-05, "loss": 0.8339, "step": 280 }, { "epoch": 9.02, "learning_rate": 2.8703703703703706e-05, "loss": 0.8118, "step": 290 }, { "epoch": 9.04, "learning_rate": 2.777777777777778e-05, "loss": 0.8068, "step": 300 }, { "epoch": 9.05, "learning_rate": 2.6851851851851855e-05, "loss": 0.7042, "step": 310 }, { "epoch": 9.05, "eval_accuracy": 0.75, "eval_loss": 0.6476640701293945, "eval_runtime": 63.3755, "eval_samples_per_second": 1.262, "eval_steps_per_second": 0.221, "step": 310 }, { "epoch": 10.02, "learning_rate": 2.5925925925925925e-05, "loss": 0.6771, "step": 320 }, { "epoch": 10.03, "learning_rate": 2.5e-05, "loss": 0.7369, "step": 330 }, { "epoch": 10.05, "learning_rate": 2.4074074074074074e-05, "loss": 0.6543, "step": 340 }, { "epoch": 10.05, "eval_accuracy": 0.7375, "eval_loss": 0.6962689757347107, "eval_runtime": 63.3385, "eval_samples_per_second": 1.263, "eval_steps_per_second": 0.221, "step": 341 }, { "epoch": 11.02, "learning_rate": 2.314814814814815e-05, "loss": 0.7641, "step": 350 }, { "epoch": 11.03, "learning_rate": 2.2222222222222223e-05, "loss": 0.5571, "step": 360 }, { "epoch": 11.05, "learning_rate": 2.1296296296296296e-05, "loss": 0.6807, "step": 370 }, { "epoch": 11.05, "eval_accuracy": 0.85, "eval_loss": 0.49579018354415894, "eval_runtime": 64.8171, "eval_samples_per_second": 1.234, "eval_steps_per_second": 0.216, "step": 372 }, { "epoch": 12.01, "learning_rate": 2.037037037037037e-05, "loss": 0.5435, "step": 380 }, { "epoch": 12.03, "learning_rate": 1.9444444444444445e-05, "loss": 0.5266, "step": 390 }, { "epoch": 12.05, "learning_rate": 1.8518518518518518e-05, "loss": 0.4924, "step": 400 }, { "epoch": 12.05, "eval_accuracy": 0.775, "eval_loss": 0.6373826265335083, "eval_runtime": 67.4155, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.208, "step": 403 }, { "epoch": 13.01, "learning_rate": 1.7592592592592595e-05, "loss": 0.4775, "step": 410 }, { "epoch": 13.03, "learning_rate": 1.6666666666666667e-05, "loss": 0.5071, "step": 420 }, { "epoch": 13.04, "learning_rate": 1.574074074074074e-05, "loss": 0.4822, "step": 430 }, { "epoch": 13.05, "eval_accuracy": 0.75, "eval_loss": 0.6144760847091675, "eval_runtime": 64.8207, "eval_samples_per_second": 1.234, "eval_steps_per_second": 0.216, "step": 434 }, { "epoch": 14.01, "learning_rate": 1.4814814814814815e-05, "loss": 0.3259, "step": 440 }, { "epoch": 14.03, "learning_rate": 1.388888888888889e-05, "loss": 0.5054, "step": 450 }, { "epoch": 14.04, "learning_rate": 1.2962962962962962e-05, "loss": 0.4878, "step": 460 }, { "epoch": 14.05, "eval_accuracy": 0.7625, "eval_loss": 0.6274302005767822, "eval_runtime": 62.4581, "eval_samples_per_second": 1.281, "eval_steps_per_second": 0.224, "step": 465 }, { "epoch": 15.01, "learning_rate": 1.2037037037037037e-05, "loss": 0.3902, "step": 470 }, { "epoch": 15.03, "learning_rate": 1.1111111111111112e-05, "loss": 0.4728, "step": 480 }, { "epoch": 15.04, "learning_rate": 1.0185185185185185e-05, "loss": 0.4442, "step": 490 }, { "epoch": 15.05, "eval_accuracy": 0.85, "eval_loss": 0.42305102944374084, "eval_runtime": 62.8623, "eval_samples_per_second": 1.273, "eval_steps_per_second": 0.223, "step": 496 }, { "epoch": 16.01, "learning_rate": 9.259259259259259e-06, "loss": 0.443, "step": 500 }, { "epoch": 16.02, "learning_rate": 8.333333333333334e-06, "loss": 0.3427, "step": 510 }, { "epoch": 16.04, "learning_rate": 7.4074074074074075e-06, "loss": 0.2739, "step": 520 }, { "epoch": 16.05, "eval_accuracy": 0.85, "eval_loss": 0.4999323785305023, "eval_runtime": 63.9597, "eval_samples_per_second": 1.251, "eval_steps_per_second": 0.219, "step": 527 }, { "epoch": 17.0, "learning_rate": 6.481481481481481e-06, "loss": 0.3676, "step": 530 }, { "epoch": 17.02, "learning_rate": 5.555555555555556e-06, "loss": 0.5171, "step": 540 }, { "epoch": 17.04, "learning_rate": 4.6296296296296296e-06, "loss": 0.3514, "step": 550 }, { "epoch": 17.05, "eval_accuracy": 0.8375, "eval_loss": 0.4638718068599701, "eval_runtime": 65.0616, "eval_samples_per_second": 1.23, "eval_steps_per_second": 0.215, "step": 558 }, { "epoch": 18.0, "learning_rate": 3.7037037037037037e-06, "loss": 0.2877, "step": 560 }, { "epoch": 18.02, "learning_rate": 2.777777777777778e-06, "loss": 0.3558, "step": 570 }, { "epoch": 18.04, "learning_rate": 1.8518518518518519e-06, "loss": 0.4158, "step": 580 }, { "epoch": 18.05, "eval_accuracy": 0.85, "eval_loss": 0.42912358045578003, "eval_runtime": 64.0669, "eval_samples_per_second": 1.249, "eval_steps_per_second": 0.219, "step": 589 }, { "epoch": 19.0, "learning_rate": 9.259259259259259e-07, "loss": 0.3158, "step": 590 }, { "epoch": 19.02, "learning_rate": 0.0, "loss": 0.2689, "step": 600 }, { "epoch": 19.02, "eval_accuracy": 0.85, "eval_loss": 0.42943257093429565, "eval_runtime": 64.3343, "eval_samples_per_second": 1.244, "eval_steps_per_second": 0.218, "step": 600 }, { "epoch": 19.02, "step": 600, "total_flos": 4.415279302910214e+18, "train_loss": 1.063151851495107, "train_runtime": 5413.8579, "train_samples_per_second": 0.665, "train_steps_per_second": 0.111 }, { "epoch": 19.02, "eval_accuracy": 0.85, "eval_loss": 0.49579018354415894, "eval_runtime": 66.0758, "eval_samples_per_second": 1.211, "eval_steps_per_second": 0.212, "step": 600 }, { "epoch": 19.02, "eval_accuracy": 0.85, "eval_loss": 0.49579015374183655, "eval_runtime": 64.7531, "eval_samples_per_second": 1.235, "eval_steps_per_second": 0.216, "step": 600 } ], "max_steps": 600, "num_train_epochs": 9223372036854775807, "total_flos": 4.415279302910214e+18, "trial_name": null, "trial_params": null }