{ "best_metric": 0.5666666666666667, "best_model_checkpoint": "MCG-NJU/videomae-large/checkpoint-140", "epoch": 31.013636363636362, "eval_steps": 500, "global_step": 220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.1363636363636365e-05, "loss": 2.6619, "step": 5 }, { "epoch": 0.03, "eval_accuracy": 0.0, "eval_loss": 2.7016749382019043, "eval_runtime": 5.569, "eval_samples_per_second": 5.387, "eval_steps_per_second": 0.359, "step": 7 }, { "epoch": 1.01, "learning_rate": 2.272727272727273e-05, "loss": 2.6232, "step": 10 }, { "epoch": 1.03, "eval_accuracy": 0.0, "eval_loss": 2.6628258228302, "eval_runtime": 5.363, "eval_samples_per_second": 5.594, "eval_steps_per_second": 0.373, "step": 14 }, { "epoch": 2.0, "learning_rate": 3.409090909090909e-05, "loss": 2.5419, "step": 15 }, { "epoch": 2.03, "learning_rate": 4.545454545454546e-05, "loss": 2.381, "step": 20 }, { "epoch": 2.03, "eval_accuracy": 0.16666666666666666, "eval_loss": 2.5797576904296875, "eval_runtime": 5.6099, "eval_samples_per_second": 5.348, "eval_steps_per_second": 0.357, "step": 21 }, { "epoch": 3.02, "learning_rate": 4.9242424242424245e-05, "loss": 2.2215, "step": 25 }, { "epoch": 3.03, "eval_accuracy": 0.16666666666666666, "eval_loss": 2.4757392406463623, "eval_runtime": 5.5008, "eval_samples_per_second": 5.454, "eval_steps_per_second": 0.364, "step": 28 }, { "epoch": 4.01, "learning_rate": 4.797979797979798e-05, "loss": 1.8738, "step": 30 }, { "epoch": 4.03, "learning_rate": 4.671717171717172e-05, "loss": 1.7389, "step": 35 }, { "epoch": 4.03, "eval_accuracy": 0.23333333333333334, "eval_loss": 2.363579511642456, "eval_runtime": 6.4613, "eval_samples_per_second": 4.643, "eval_steps_per_second": 0.31, "step": 35 }, { "epoch": 5.02, "learning_rate": 4.545454545454546e-05, "loss": 1.3366, "step": 40 }, { "epoch": 5.03, "eval_accuracy": 0.3, "eval_loss": 2.2424137592315674, "eval_runtime": 5.7626, "eval_samples_per_second": 5.206, "eval_steps_per_second": 0.347, "step": 42 }, { "epoch": 6.01, "learning_rate": 4.41919191919192e-05, "loss": 1.1946, "step": 45 }, { "epoch": 6.03, "eval_accuracy": 0.3, "eval_loss": 2.167524814605713, "eval_runtime": 5.8137, "eval_samples_per_second": 5.16, "eval_steps_per_second": 0.344, "step": 49 }, { "epoch": 7.0, "learning_rate": 4.292929292929293e-05, "loss": 0.8832, "step": 50 }, { "epoch": 7.03, "learning_rate": 4.166666666666667e-05, "loss": 0.6809, "step": 55 }, { "epoch": 7.03, "eval_accuracy": 0.36666666666666664, "eval_loss": 2.0548174381256104, "eval_runtime": 5.6032, "eval_samples_per_second": 5.354, "eval_steps_per_second": 0.357, "step": 56 }, { "epoch": 8.02, "learning_rate": 4.0404040404040405e-05, "loss": 0.5255, "step": 60 }, { "epoch": 8.03, "eval_accuracy": 0.4, "eval_loss": 2.0410492420196533, "eval_runtime": 5.8803, "eval_samples_per_second": 5.102, "eval_steps_per_second": 0.34, "step": 63 }, { "epoch": 9.01, "learning_rate": 3.9141414141414145e-05, "loss": 0.4759, "step": 65 }, { "epoch": 9.03, "learning_rate": 3.787878787878788e-05, "loss": 0.3285, "step": 70 }, { "epoch": 9.03, "eval_accuracy": 0.4, "eval_loss": 1.9539462327957153, "eval_runtime": 6.0204, "eval_samples_per_second": 4.983, "eval_steps_per_second": 0.332, "step": 70 }, { "epoch": 10.02, "learning_rate": 3.661616161616162e-05, "loss": 0.2849, "step": 75 }, { "epoch": 10.03, "eval_accuracy": 0.4666666666666667, "eval_loss": 1.8536347150802612, "eval_runtime": 5.2379, "eval_samples_per_second": 5.727, "eval_steps_per_second": 0.382, "step": 77 }, { "epoch": 11.01, "learning_rate": 3.535353535353535e-05, "loss": 0.1832, "step": 80 }, { "epoch": 11.03, "eval_accuracy": 0.43333333333333335, "eval_loss": 1.8293204307556152, "eval_runtime": 5.7575, "eval_samples_per_second": 5.211, "eval_steps_per_second": 0.347, "step": 84 }, { "epoch": 12.0, "learning_rate": 3.409090909090909e-05, "loss": 0.1485, "step": 85 }, { "epoch": 12.03, "learning_rate": 3.282828282828283e-05, "loss": 0.1307, "step": 90 }, { "epoch": 12.03, "eval_accuracy": 0.4, "eval_loss": 1.8200174570083618, "eval_runtime": 5.6546, "eval_samples_per_second": 5.305, "eval_steps_per_second": 0.354, "step": 91 }, { "epoch": 13.02, "learning_rate": 3.1565656565656566e-05, "loss": 0.0901, "step": 95 }, { "epoch": 13.03, "eval_accuracy": 0.4, "eval_loss": 1.8354666233062744, "eval_runtime": 5.7638, "eval_samples_per_second": 5.205, "eval_steps_per_second": 0.347, "step": 98 }, { "epoch": 14.01, "learning_rate": 3.0303030303030306e-05, "loss": 0.0757, "step": 100 }, { "epoch": 14.03, "learning_rate": 2.904040404040404e-05, "loss": 0.0636, "step": 105 }, { "epoch": 14.03, "eval_accuracy": 0.43333333333333335, "eval_loss": 1.8200985193252563, "eval_runtime": 5.0989, "eval_samples_per_second": 5.884, "eval_steps_per_second": 0.392, "step": 105 }, { "epoch": 15.02, "learning_rate": 2.777777777777778e-05, "loss": 0.0413, "step": 110 }, { "epoch": 15.03, "eval_accuracy": 0.4666666666666667, "eval_loss": 1.7749541997909546, "eval_runtime": 5.2291, "eval_samples_per_second": 5.737, "eval_steps_per_second": 0.382, "step": 112 }, { "epoch": 16.01, "learning_rate": 2.6515151515151516e-05, "loss": 0.0427, "step": 115 }, { "epoch": 16.03, "eval_accuracy": 0.5333333333333333, "eval_loss": 1.745997667312622, "eval_runtime": 5.2765, "eval_samples_per_second": 5.686, "eval_steps_per_second": 0.379, "step": 119 }, { "epoch": 17.0, "learning_rate": 2.5252525252525256e-05, "loss": 0.0369, "step": 120 }, { "epoch": 17.03, "learning_rate": 2.398989898989899e-05, "loss": 0.0254, "step": 125 }, { "epoch": 17.03, "eval_accuracy": 0.5333333333333333, "eval_loss": 1.7804018259048462, "eval_runtime": 5.5848, "eval_samples_per_second": 5.372, "eval_steps_per_second": 0.358, "step": 126 }, { "epoch": 18.02, "learning_rate": 2.272727272727273e-05, "loss": 0.0203, "step": 130 }, { "epoch": 18.03, "eval_accuracy": 0.43333333333333335, "eval_loss": 1.8868685960769653, "eval_runtime": 5.6579, "eval_samples_per_second": 5.302, "eval_steps_per_second": 0.353, "step": 133 }, { "epoch": 19.01, "learning_rate": 2.1464646464646466e-05, "loss": 0.0231, "step": 135 }, { "epoch": 19.03, "learning_rate": 2.0202020202020203e-05, "loss": 0.0174, "step": 140 }, { "epoch": 19.03, "eval_accuracy": 0.5666666666666667, "eval_loss": 1.7740839719772339, "eval_runtime": 5.4112, "eval_samples_per_second": 5.544, "eval_steps_per_second": 0.37, "step": 140 }, { "epoch": 20.02, "learning_rate": 1.893939393939394e-05, "loss": 0.0154, "step": 145 }, { "epoch": 20.03, "eval_accuracy": 0.5333333333333333, "eval_loss": 1.7400553226470947, "eval_runtime": 5.673, "eval_samples_per_second": 5.288, "eval_steps_per_second": 0.353, "step": 147 }, { "epoch": 21.01, "learning_rate": 1.7676767676767676e-05, "loss": 0.0136, "step": 150 }, { "epoch": 21.03, "eval_accuracy": 0.5, "eval_loss": 1.7672396898269653, "eval_runtime": 5.5489, "eval_samples_per_second": 5.406, "eval_steps_per_second": 0.36, "step": 154 }, { "epoch": 22.0, "learning_rate": 1.6414141414141416e-05, "loss": 0.0123, "step": 155 }, { "epoch": 22.03, "learning_rate": 1.5151515151515153e-05, "loss": 0.0116, "step": 160 }, { "epoch": 22.03, "eval_accuracy": 0.5333333333333333, "eval_loss": 1.7792834043502808, "eval_runtime": 5.7051, "eval_samples_per_second": 5.258, "eval_steps_per_second": 0.351, "step": 161 }, { "epoch": 23.02, "learning_rate": 1.388888888888889e-05, "loss": 0.0123, "step": 165 }, { "epoch": 23.03, "eval_accuracy": 0.4666666666666667, "eval_loss": 1.8018161058425903, "eval_runtime": 5.5773, "eval_samples_per_second": 5.379, "eval_steps_per_second": 0.359, "step": 168 }, { "epoch": 24.01, "learning_rate": 1.2626262626262628e-05, "loss": 0.0093, "step": 170 }, { "epoch": 24.03, "learning_rate": 1.1363636363636365e-05, "loss": 0.0102, "step": 175 }, { "epoch": 24.03, "eval_accuracy": 0.5, "eval_loss": 1.8023875951766968, "eval_runtime": 5.4661, "eval_samples_per_second": 5.488, "eval_steps_per_second": 0.366, "step": 175 }, { "epoch": 25.02, "learning_rate": 1.0101010101010101e-05, "loss": 0.0103, "step": 180 }, { "epoch": 25.03, "eval_accuracy": 0.5, "eval_loss": 1.8057912588119507, "eval_runtime": 5.5758, "eval_samples_per_second": 5.38, "eval_steps_per_second": 0.359, "step": 182 }, { "epoch": 26.01, "learning_rate": 8.838383838383838e-06, "loss": 0.0089, "step": 185 }, { "epoch": 26.03, "eval_accuracy": 0.5, "eval_loss": 1.810552954673767, "eval_runtime": 5.6149, "eval_samples_per_second": 5.343, "eval_steps_per_second": 0.356, "step": 189 }, { "epoch": 27.0, "learning_rate": 7.5757575757575764e-06, "loss": 0.009, "step": 190 }, { "epoch": 27.03, "learning_rate": 6.313131313131314e-06, "loss": 0.0088, "step": 195 }, { "epoch": 27.03, "eval_accuracy": 0.5, "eval_loss": 1.8028618097305298, "eval_runtime": 5.3559, "eval_samples_per_second": 5.601, "eval_steps_per_second": 0.373, "step": 196 }, { "epoch": 28.02, "learning_rate": 5.050505050505051e-06, "loss": 0.0092, "step": 200 }, { "epoch": 28.03, "eval_accuracy": 0.5, "eval_loss": 1.7960565090179443, "eval_runtime": 5.2538, "eval_samples_per_second": 5.71, "eval_steps_per_second": 0.381, "step": 203 }, { "epoch": 29.01, "learning_rate": 3.7878787878787882e-06, "loss": 0.0082, "step": 205 }, { "epoch": 29.03, "learning_rate": 2.5252525252525253e-06, "loss": 0.0083, "step": 210 }, { "epoch": 29.03, "eval_accuracy": 0.5, "eval_loss": 1.7939893007278442, "eval_runtime": 5.0992, "eval_samples_per_second": 5.883, "eval_steps_per_second": 0.392, "step": 210 }, { "epoch": 30.02, "learning_rate": 1.2626262626262627e-06, "loss": 0.0099, "step": 215 }, { "epoch": 30.03, "eval_accuracy": 0.5, "eval_loss": 1.7922049760818481, "eval_runtime": 5.3556, "eval_samples_per_second": 5.602, "eval_steps_per_second": 0.373, "step": 217 }, { "epoch": 31.01, "learning_rate": 0.0, "loss": 0.0085, "step": 220 }, { "epoch": 31.01, "eval_accuracy": 0.5, "eval_loss": 1.7919764518737793, "eval_runtime": 5.1737, "eval_samples_per_second": 5.799, "eval_steps_per_second": 0.387, "step": 220 }, { "epoch": 31.01, "step": 220, "total_flos": 1.5320910961010737e+19, "train_loss": 0.5185655888847329, "train_runtime": 1238.2272, "train_samples_per_second": 2.843, "train_steps_per_second": 0.178 }, { "epoch": 31.01, "eval_accuracy": 0.42857142857142855, "eval_loss": 1.504156231880188, "eval_runtime": 2.3481, "eval_samples_per_second": 5.962, "eval_steps_per_second": 0.426, "step": 220 }, { "epoch": 31.01, "eval_accuracy": 0.42857142857142855, "eval_loss": 1.504156231880188, "eval_runtime": 2.2938, "eval_samples_per_second": 6.104, "eval_steps_per_second": 0.436, "step": 220 } ], "logging_steps": 5, "max_steps": 220, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 1.5320910961010737e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }