{ "best_metric": 0.793597304128054, "best_model_checkpoint": "MCG-NJU/videomae-base-finetuned-TikHaram/checkpoint-120", "epoch": 9.084745762711865, "eval_steps": 500, "global_step": 295, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03389830508474576, "grad_norm": 5.0345001220703125, "learning_rate": 1.6666666666666667e-05, "loss": 1.3726, "step": 10 }, { "epoch": 0.06779661016949153, "grad_norm": 6.756855487823486, "learning_rate": 3.3333333333333335e-05, "loss": 1.3354, "step": 20 }, { "epoch": 0.1016949152542373, "grad_norm": 10.93206787109375, "learning_rate": 5e-05, "loss": 1.1179, "step": 30 }, { "epoch": 0.1016949152542373, "eval_accuracy": 0.6857624262847515, "eval_loss": 0.9409521222114563, "eval_runtime": 1237.8509, "eval_samples_per_second": 0.959, "eval_steps_per_second": 0.12, "step": 30 }, { "epoch": 1.0338983050847457, "grad_norm": 7.3327813148498535, "learning_rate": 4.811320754716982e-05, "loss": 0.8248, "step": 40 }, { "epoch": 1.0677966101694916, "grad_norm": 10.208086967468262, "learning_rate": 4.6226415094339625e-05, "loss": 0.759, "step": 50 }, { "epoch": 1.1016949152542372, "grad_norm": 2.690416097640991, "learning_rate": 4.433962264150944e-05, "loss": 0.6162, "step": 60 }, { "epoch": 1.1016949152542372, "eval_accuracy": 0.7497893850042123, "eval_loss": 0.6875647902488708, "eval_runtime": 1167.6728, "eval_samples_per_second": 1.017, "eval_steps_per_second": 0.128, "step": 60 }, { "epoch": 2.0338983050847457, "grad_norm": 8.078776359558105, "learning_rate": 4.245283018867925e-05, "loss": 0.5685, "step": 70 }, { "epoch": 2.0677966101694913, "grad_norm": 4.468865871429443, "learning_rate": 4.0566037735849064e-05, "loss": 0.4638, "step": 80 }, { "epoch": 2.1016949152542375, "grad_norm": 4.634223937988281, "learning_rate": 3.867924528301887e-05, "loss": 0.5604, "step": 90 }, { "epoch": 2.1016949152542375, "eval_accuracy": 0.7363100252737995, "eval_loss": 0.6748067140579224, "eval_runtime": 1179.8147, "eval_samples_per_second": 1.006, "eval_steps_per_second": 0.126, "step": 90 }, { "epoch": 3.0338983050847457, "grad_norm": 1.432814359664917, "learning_rate": 3.679245283018868e-05, "loss": 0.4324, "step": 100 }, { "epoch": 3.0677966101694913, "grad_norm": 8.20600700378418, "learning_rate": 3.490566037735849e-05, "loss": 0.3613, "step": 110 }, { "epoch": 3.1016949152542375, "grad_norm": 5.8781514167785645, "learning_rate": 3.30188679245283e-05, "loss": 0.3628, "step": 120 }, { "epoch": 3.1016949152542375, "eval_accuracy": 0.793597304128054, "eval_loss": 0.6228342056274414, "eval_runtime": 1039.6898, "eval_samples_per_second": 1.142, "eval_steps_per_second": 0.143, "step": 120 }, { "epoch": 4.033898305084746, "grad_norm": 5.754726886749268, "learning_rate": 3.113207547169811e-05, "loss": 0.2648, "step": 130 }, { "epoch": 4.067796610169491, "grad_norm": 4.939706325531006, "learning_rate": 2.9245283018867926e-05, "loss": 0.2436, "step": 140 }, { "epoch": 4.101694915254237, "grad_norm": 1.7757413387298584, "learning_rate": 2.7358490566037738e-05, "loss": 0.2861, "step": 150 }, { "epoch": 4.101694915254237, "eval_accuracy": 0.7422072451558551, "eval_loss": 0.7411791682243347, "eval_runtime": 1006.166, "eval_samples_per_second": 1.18, "eval_steps_per_second": 0.148, "step": 150 }, { "epoch": 5.033898305084746, "grad_norm": 9.85596752166748, "learning_rate": 2.547169811320755e-05, "loss": 0.1681, "step": 160 }, { "epoch": 5.067796610169491, "grad_norm": 5.040333271026611, "learning_rate": 2.358490566037736e-05, "loss": 0.3501, "step": 170 }, { "epoch": 5.101694915254237, "grad_norm": 7.130555629730225, "learning_rate": 2.1698113207547172e-05, "loss": 0.213, "step": 180 }, { "epoch": 5.101694915254237, "eval_accuracy": 0.7666385846672283, "eval_loss": 0.6355797052383423, "eval_runtime": 1155.5919, "eval_samples_per_second": 1.027, "eval_steps_per_second": 0.129, "step": 180 }, { "epoch": 6.033898305084746, "grad_norm": 3.2904608249664307, "learning_rate": 1.9811320754716984e-05, "loss": 0.1472, "step": 190 }, { "epoch": 6.067796610169491, "grad_norm": 2.7492494583129883, "learning_rate": 1.7924528301886792e-05, "loss": 0.1845, "step": 200 }, { "epoch": 6.101694915254237, "grad_norm": 1.3256348371505737, "learning_rate": 1.6037735849056604e-05, "loss": 0.1428, "step": 210 }, { "epoch": 6.101694915254237, "eval_accuracy": 0.7590564448188711, "eval_loss": 0.6791483163833618, "eval_runtime": 1101.1588, "eval_samples_per_second": 1.078, "eval_steps_per_second": 0.135, "step": 210 }, { "epoch": 7.033898305084746, "grad_norm": 1.3044459819793701, "learning_rate": 1.4150943396226415e-05, "loss": 0.1269, "step": 220 }, { "epoch": 7.067796610169491, "grad_norm": 1.1943762302398682, "learning_rate": 1.2264150943396227e-05, "loss": 0.057, "step": 230 }, { "epoch": 7.101694915254237, "grad_norm": 20.165433883666992, "learning_rate": 1.0377358490566038e-05, "loss": 0.1536, "step": 240 }, { "epoch": 7.101694915254237, "eval_accuracy": 0.7523167649536647, "eval_loss": 0.7612273097038269, "eval_runtime": 1207.276, "eval_samples_per_second": 0.983, "eval_steps_per_second": 0.123, "step": 240 }, { "epoch": 8.033898305084746, "grad_norm": 0.7759658694267273, "learning_rate": 8.49056603773585e-06, "loss": 0.0919, "step": 250 }, { "epoch": 8.067796610169491, "grad_norm": 0.4191964268684387, "learning_rate": 6.60377358490566e-06, "loss": 0.2008, "step": 260 }, { "epoch": 8.101694915254237, "grad_norm": 0.7120205760002136, "learning_rate": 4.716981132075472e-06, "loss": 0.1594, "step": 270 }, { "epoch": 8.101694915254237, "eval_accuracy": 0.7624262847514743, "eval_loss": 0.7678460478782654, "eval_runtime": 1195.7328, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.125, "step": 270 }, { "epoch": 9.033898305084746, "grad_norm": 3.204578161239624, "learning_rate": 2.830188679245283e-06, "loss": 0.1085, "step": 280 }, { "epoch": 9.067796610169491, "grad_norm": 2.541273355484009, "learning_rate": 9.433962264150943e-07, "loss": 0.1056, "step": 290 }, { "epoch": 9.084745762711865, "eval_accuracy": 0.7657961246840775, "eval_loss": 0.7785321474075317, "eval_runtime": 1147.1184, "eval_samples_per_second": 1.035, "eval_steps_per_second": 0.13, "step": 295 }, { "epoch": 9.084745762711865, "step": 295, "total_flos": 2.8959057571998597e+18, "train_loss": 0.4020373065592879, "train_runtime": 14069.0887, "train_samples_per_second": 0.168, "train_steps_per_second": 0.021 }, { "epoch": 9.084745762711865, "eval_accuracy": 0.6756505576208178, "eval_loss": 0.875449001789093, "eval_runtime": 932.4911, "eval_samples_per_second": 1.154, "eval_steps_per_second": 0.145, "step": 295 }, { "epoch": 9.084745762711865, "eval_accuracy": 0.6756505576208178, "eval_loss": 0.8754490613937378, "eval_runtime": 924.3, "eval_samples_per_second": 1.164, "eval_steps_per_second": 0.146, "step": 295 } ], "logging_steps": 10, "max_steps": 295, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8959057571998597e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }