videomae-large / trainer_state.json
Tianjiao-Yu's picture
End of training
58ac5f7 verified
raw
history blame contribute delete
No virus
14.1 kB
{
"best_metric": 0.5666666666666667,
"best_model_checkpoint": "MCG-NJU/videomae-large/checkpoint-140",
"epoch": 31.013636363636362,
"eval_steps": 500,
"global_step": 220,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 1.1363636363636365e-05,
"loss": 2.6619,
"step": 5
},
{
"epoch": 0.03,
"eval_accuracy": 0.0,
"eval_loss": 2.7016749382019043,
"eval_runtime": 5.569,
"eval_samples_per_second": 5.387,
"eval_steps_per_second": 0.359,
"step": 7
},
{
"epoch": 1.01,
"learning_rate": 2.272727272727273e-05,
"loss": 2.6232,
"step": 10
},
{
"epoch": 1.03,
"eval_accuracy": 0.0,
"eval_loss": 2.6628258228302,
"eval_runtime": 5.363,
"eval_samples_per_second": 5.594,
"eval_steps_per_second": 0.373,
"step": 14
},
{
"epoch": 2.0,
"learning_rate": 3.409090909090909e-05,
"loss": 2.5419,
"step": 15
},
{
"epoch": 2.03,
"learning_rate": 4.545454545454546e-05,
"loss": 2.381,
"step": 20
},
{
"epoch": 2.03,
"eval_accuracy": 0.16666666666666666,
"eval_loss": 2.5797576904296875,
"eval_runtime": 5.6099,
"eval_samples_per_second": 5.348,
"eval_steps_per_second": 0.357,
"step": 21
},
{
"epoch": 3.02,
"learning_rate": 4.9242424242424245e-05,
"loss": 2.2215,
"step": 25
},
{
"epoch": 3.03,
"eval_accuracy": 0.16666666666666666,
"eval_loss": 2.4757392406463623,
"eval_runtime": 5.5008,
"eval_samples_per_second": 5.454,
"eval_steps_per_second": 0.364,
"step": 28
},
{
"epoch": 4.01,
"learning_rate": 4.797979797979798e-05,
"loss": 1.8738,
"step": 30
},
{
"epoch": 4.03,
"learning_rate": 4.671717171717172e-05,
"loss": 1.7389,
"step": 35
},
{
"epoch": 4.03,
"eval_accuracy": 0.23333333333333334,
"eval_loss": 2.363579511642456,
"eval_runtime": 6.4613,
"eval_samples_per_second": 4.643,
"eval_steps_per_second": 0.31,
"step": 35
},
{
"epoch": 5.02,
"learning_rate": 4.545454545454546e-05,
"loss": 1.3366,
"step": 40
},
{
"epoch": 5.03,
"eval_accuracy": 0.3,
"eval_loss": 2.2424137592315674,
"eval_runtime": 5.7626,
"eval_samples_per_second": 5.206,
"eval_steps_per_second": 0.347,
"step": 42
},
{
"epoch": 6.01,
"learning_rate": 4.41919191919192e-05,
"loss": 1.1946,
"step": 45
},
{
"epoch": 6.03,
"eval_accuracy": 0.3,
"eval_loss": 2.167524814605713,
"eval_runtime": 5.8137,
"eval_samples_per_second": 5.16,
"eval_steps_per_second": 0.344,
"step": 49
},
{
"epoch": 7.0,
"learning_rate": 4.292929292929293e-05,
"loss": 0.8832,
"step": 50
},
{
"epoch": 7.03,
"learning_rate": 4.166666666666667e-05,
"loss": 0.6809,
"step": 55
},
{
"epoch": 7.03,
"eval_accuracy": 0.36666666666666664,
"eval_loss": 2.0548174381256104,
"eval_runtime": 5.6032,
"eval_samples_per_second": 5.354,
"eval_steps_per_second": 0.357,
"step": 56
},
{
"epoch": 8.02,
"learning_rate": 4.0404040404040405e-05,
"loss": 0.5255,
"step": 60
},
{
"epoch": 8.03,
"eval_accuracy": 0.4,
"eval_loss": 2.0410492420196533,
"eval_runtime": 5.8803,
"eval_samples_per_second": 5.102,
"eval_steps_per_second": 0.34,
"step": 63
},
{
"epoch": 9.01,
"learning_rate": 3.9141414141414145e-05,
"loss": 0.4759,
"step": 65
},
{
"epoch": 9.03,
"learning_rate": 3.787878787878788e-05,
"loss": 0.3285,
"step": 70
},
{
"epoch": 9.03,
"eval_accuracy": 0.4,
"eval_loss": 1.9539462327957153,
"eval_runtime": 6.0204,
"eval_samples_per_second": 4.983,
"eval_steps_per_second": 0.332,
"step": 70
},
{
"epoch": 10.02,
"learning_rate": 3.661616161616162e-05,
"loss": 0.2849,
"step": 75
},
{
"epoch": 10.03,
"eval_accuracy": 0.4666666666666667,
"eval_loss": 1.8536347150802612,
"eval_runtime": 5.2379,
"eval_samples_per_second": 5.727,
"eval_steps_per_second": 0.382,
"step": 77
},
{
"epoch": 11.01,
"learning_rate": 3.535353535353535e-05,
"loss": 0.1832,
"step": 80
},
{
"epoch": 11.03,
"eval_accuracy": 0.43333333333333335,
"eval_loss": 1.8293204307556152,
"eval_runtime": 5.7575,
"eval_samples_per_second": 5.211,
"eval_steps_per_second": 0.347,
"step": 84
},
{
"epoch": 12.0,
"learning_rate": 3.409090909090909e-05,
"loss": 0.1485,
"step": 85
},
{
"epoch": 12.03,
"learning_rate": 3.282828282828283e-05,
"loss": 0.1307,
"step": 90
},
{
"epoch": 12.03,
"eval_accuracy": 0.4,
"eval_loss": 1.8200174570083618,
"eval_runtime": 5.6546,
"eval_samples_per_second": 5.305,
"eval_steps_per_second": 0.354,
"step": 91
},
{
"epoch": 13.02,
"learning_rate": 3.1565656565656566e-05,
"loss": 0.0901,
"step": 95
},
{
"epoch": 13.03,
"eval_accuracy": 0.4,
"eval_loss": 1.8354666233062744,
"eval_runtime": 5.7638,
"eval_samples_per_second": 5.205,
"eval_steps_per_second": 0.347,
"step": 98
},
{
"epoch": 14.01,
"learning_rate": 3.0303030303030306e-05,
"loss": 0.0757,
"step": 100
},
{
"epoch": 14.03,
"learning_rate": 2.904040404040404e-05,
"loss": 0.0636,
"step": 105
},
{
"epoch": 14.03,
"eval_accuracy": 0.43333333333333335,
"eval_loss": 1.8200985193252563,
"eval_runtime": 5.0989,
"eval_samples_per_second": 5.884,
"eval_steps_per_second": 0.392,
"step": 105
},
{
"epoch": 15.02,
"learning_rate": 2.777777777777778e-05,
"loss": 0.0413,
"step": 110
},
{
"epoch": 15.03,
"eval_accuracy": 0.4666666666666667,
"eval_loss": 1.7749541997909546,
"eval_runtime": 5.2291,
"eval_samples_per_second": 5.737,
"eval_steps_per_second": 0.382,
"step": 112
},
{
"epoch": 16.01,
"learning_rate": 2.6515151515151516e-05,
"loss": 0.0427,
"step": 115
},
{
"epoch": 16.03,
"eval_accuracy": 0.5333333333333333,
"eval_loss": 1.745997667312622,
"eval_runtime": 5.2765,
"eval_samples_per_second": 5.686,
"eval_steps_per_second": 0.379,
"step": 119
},
{
"epoch": 17.0,
"learning_rate": 2.5252525252525256e-05,
"loss": 0.0369,
"step": 120
},
{
"epoch": 17.03,
"learning_rate": 2.398989898989899e-05,
"loss": 0.0254,
"step": 125
},
{
"epoch": 17.03,
"eval_accuracy": 0.5333333333333333,
"eval_loss": 1.7804018259048462,
"eval_runtime": 5.5848,
"eval_samples_per_second": 5.372,
"eval_steps_per_second": 0.358,
"step": 126
},
{
"epoch": 18.02,
"learning_rate": 2.272727272727273e-05,
"loss": 0.0203,
"step": 130
},
{
"epoch": 18.03,
"eval_accuracy": 0.43333333333333335,
"eval_loss": 1.8868685960769653,
"eval_runtime": 5.6579,
"eval_samples_per_second": 5.302,
"eval_steps_per_second": 0.353,
"step": 133
},
{
"epoch": 19.01,
"learning_rate": 2.1464646464646466e-05,
"loss": 0.0231,
"step": 135
},
{
"epoch": 19.03,
"learning_rate": 2.0202020202020203e-05,
"loss": 0.0174,
"step": 140
},
{
"epoch": 19.03,
"eval_accuracy": 0.5666666666666667,
"eval_loss": 1.7740839719772339,
"eval_runtime": 5.4112,
"eval_samples_per_second": 5.544,
"eval_steps_per_second": 0.37,
"step": 140
},
{
"epoch": 20.02,
"learning_rate": 1.893939393939394e-05,
"loss": 0.0154,
"step": 145
},
{
"epoch": 20.03,
"eval_accuracy": 0.5333333333333333,
"eval_loss": 1.7400553226470947,
"eval_runtime": 5.673,
"eval_samples_per_second": 5.288,
"eval_steps_per_second": 0.353,
"step": 147
},
{
"epoch": 21.01,
"learning_rate": 1.7676767676767676e-05,
"loss": 0.0136,
"step": 150
},
{
"epoch": 21.03,
"eval_accuracy": 0.5,
"eval_loss": 1.7672396898269653,
"eval_runtime": 5.5489,
"eval_samples_per_second": 5.406,
"eval_steps_per_second": 0.36,
"step": 154
},
{
"epoch": 22.0,
"learning_rate": 1.6414141414141416e-05,
"loss": 0.0123,
"step": 155
},
{
"epoch": 22.03,
"learning_rate": 1.5151515151515153e-05,
"loss": 0.0116,
"step": 160
},
{
"epoch": 22.03,
"eval_accuracy": 0.5333333333333333,
"eval_loss": 1.7792834043502808,
"eval_runtime": 5.7051,
"eval_samples_per_second": 5.258,
"eval_steps_per_second": 0.351,
"step": 161
},
{
"epoch": 23.02,
"learning_rate": 1.388888888888889e-05,
"loss": 0.0123,
"step": 165
},
{
"epoch": 23.03,
"eval_accuracy": 0.4666666666666667,
"eval_loss": 1.8018161058425903,
"eval_runtime": 5.5773,
"eval_samples_per_second": 5.379,
"eval_steps_per_second": 0.359,
"step": 168
},
{
"epoch": 24.01,
"learning_rate": 1.2626262626262628e-05,
"loss": 0.0093,
"step": 170
},
{
"epoch": 24.03,
"learning_rate": 1.1363636363636365e-05,
"loss": 0.0102,
"step": 175
},
{
"epoch": 24.03,
"eval_accuracy": 0.5,
"eval_loss": 1.8023875951766968,
"eval_runtime": 5.4661,
"eval_samples_per_second": 5.488,
"eval_steps_per_second": 0.366,
"step": 175
},
{
"epoch": 25.02,
"learning_rate": 1.0101010101010101e-05,
"loss": 0.0103,
"step": 180
},
{
"epoch": 25.03,
"eval_accuracy": 0.5,
"eval_loss": 1.8057912588119507,
"eval_runtime": 5.5758,
"eval_samples_per_second": 5.38,
"eval_steps_per_second": 0.359,
"step": 182
},
{
"epoch": 26.01,
"learning_rate": 8.838383838383838e-06,
"loss": 0.0089,
"step": 185
},
{
"epoch": 26.03,
"eval_accuracy": 0.5,
"eval_loss": 1.810552954673767,
"eval_runtime": 5.6149,
"eval_samples_per_second": 5.343,
"eval_steps_per_second": 0.356,
"step": 189
},
{
"epoch": 27.0,
"learning_rate": 7.5757575757575764e-06,
"loss": 0.009,
"step": 190
},
{
"epoch": 27.03,
"learning_rate": 6.313131313131314e-06,
"loss": 0.0088,
"step": 195
},
{
"epoch": 27.03,
"eval_accuracy": 0.5,
"eval_loss": 1.8028618097305298,
"eval_runtime": 5.3559,
"eval_samples_per_second": 5.601,
"eval_steps_per_second": 0.373,
"step": 196
},
{
"epoch": 28.02,
"learning_rate": 5.050505050505051e-06,
"loss": 0.0092,
"step": 200
},
{
"epoch": 28.03,
"eval_accuracy": 0.5,
"eval_loss": 1.7960565090179443,
"eval_runtime": 5.2538,
"eval_samples_per_second": 5.71,
"eval_steps_per_second": 0.381,
"step": 203
},
{
"epoch": 29.01,
"learning_rate": 3.7878787878787882e-06,
"loss": 0.0082,
"step": 205
},
{
"epoch": 29.03,
"learning_rate": 2.5252525252525253e-06,
"loss": 0.0083,
"step": 210
},
{
"epoch": 29.03,
"eval_accuracy": 0.5,
"eval_loss": 1.7939893007278442,
"eval_runtime": 5.0992,
"eval_samples_per_second": 5.883,
"eval_steps_per_second": 0.392,
"step": 210
},
{
"epoch": 30.02,
"learning_rate": 1.2626262626262627e-06,
"loss": 0.0099,
"step": 215
},
{
"epoch": 30.03,
"eval_accuracy": 0.5,
"eval_loss": 1.7922049760818481,
"eval_runtime": 5.3556,
"eval_samples_per_second": 5.602,
"eval_steps_per_second": 0.373,
"step": 217
},
{
"epoch": 31.01,
"learning_rate": 0.0,
"loss": 0.0085,
"step": 220
},
{
"epoch": 31.01,
"eval_accuracy": 0.5,
"eval_loss": 1.7919764518737793,
"eval_runtime": 5.1737,
"eval_samples_per_second": 5.799,
"eval_steps_per_second": 0.387,
"step": 220
},
{
"epoch": 31.01,
"step": 220,
"total_flos": 1.5320910961010737e+19,
"train_loss": 0.5185655888847329,
"train_runtime": 1238.2272,
"train_samples_per_second": 2.843,
"train_steps_per_second": 0.178
},
{
"epoch": 31.01,
"eval_accuracy": 0.42857142857142855,
"eval_loss": 1.504156231880188,
"eval_runtime": 2.3481,
"eval_samples_per_second": 5.962,
"eval_steps_per_second": 0.426,
"step": 220
},
{
"epoch": 31.01,
"eval_accuracy": 0.42857142857142855,
"eval_loss": 1.504156231880188,
"eval_runtime": 2.2938,
"eval_samples_per_second": 6.104,
"eval_steps_per_second": 0.436,
"step": 220
}
],
"logging_steps": 5,
"max_steps": 220,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"total_flos": 1.5320910961010737e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}