{ "best_metric": 0.7916666666666666, "best_model_checkpoint": "Human-action-convnext/checkpoint-234", "epoch": 2.9714285714285715, "eval_steps": 500, "global_step": 234, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "grad_norm": 2.9202792644500732, "learning_rate": 2.0833333333333336e-05, "loss": 0.985, "step": 10 }, { "epoch": 0.25, "grad_norm": 2.8895421028137207, "learning_rate": 4.166666666666667e-05, "loss": 0.9998, "step": 20 }, { "epoch": 0.38, "grad_norm": 3.1696712970733643, "learning_rate": 4.8571428571428576e-05, "loss": 0.9553, "step": 30 }, { "epoch": 0.51, "grad_norm": 3.572809934616089, "learning_rate": 4.6190476190476194e-05, "loss": 0.9258, "step": 40 }, { "epoch": 0.63, "grad_norm": 4.679945945739746, "learning_rate": 4.380952380952381e-05, "loss": 0.9274, "step": 50 }, { "epoch": 0.76, "grad_norm": 5.121299743652344, "learning_rate": 4.1428571428571437e-05, "loss": 0.8875, "step": 60 }, { "epoch": 0.89, "grad_norm": 4.941247940063477, "learning_rate": 3.904761904761905e-05, "loss": 0.9287, "step": 70 }, { "epoch": 0.99, "eval_accuracy": 0.7726190476190476, "eval_loss": 0.809448778629303, "eval_runtime": 31.5426, "eval_samples_per_second": 79.892, "eval_steps_per_second": 2.505, "step": 78 }, { "epoch": 1.02, "grad_norm": 3.490706205368042, "learning_rate": 3.6666666666666666e-05, "loss": 0.8684, "step": 80 }, { "epoch": 1.14, "grad_norm": 3.2836825847625732, "learning_rate": 3.428571428571429e-05, "loss": 0.8442, "step": 90 }, { "epoch": 1.27, "grad_norm": 3.463827610015869, "learning_rate": 3.19047619047619e-05, "loss": 0.8199, "step": 100 }, { "epoch": 1.4, "grad_norm": 4.105288982391357, "learning_rate": 2.9523809523809526e-05, "loss": 0.7896, "step": 110 }, { "epoch": 1.52, "grad_norm": 3.594646692276001, "learning_rate": 2.714285714285714e-05, "loss": 0.8322, "step": 120 }, { "epoch": 1.65, "grad_norm": 4.090346813201904, "learning_rate": 2.4761904761904762e-05, "loss": 0.7476, "step": 130 }, { "epoch": 1.78, "grad_norm": 4.468383312225342, "learning_rate": 2.2380952380952384e-05, "loss": 0.7725, "step": 140 }, { "epoch": 1.9, "grad_norm": 4.0550055503845215, "learning_rate": 2e-05, "loss": 0.8263, "step": 150 }, { "epoch": 1.99, "eval_accuracy": 0.7876984126984127, "eval_loss": 0.7395461201667786, "eval_runtime": 32.7967, "eval_samples_per_second": 76.837, "eval_steps_per_second": 2.409, "step": 157 }, { "epoch": 2.03, "grad_norm": 3.7557804584503174, "learning_rate": 1.761904761904762e-05, "loss": 0.7784, "step": 160 }, { "epoch": 2.16, "grad_norm": 3.753110647201538, "learning_rate": 1.5238095238095241e-05, "loss": 0.7744, "step": 170 }, { "epoch": 2.29, "grad_norm": 3.3746166229248047, "learning_rate": 1.2857142857142857e-05, "loss": 0.7588, "step": 180 }, { "epoch": 2.41, "grad_norm": 3.5075440406799316, "learning_rate": 1.0476190476190477e-05, "loss": 0.7543, "step": 190 }, { "epoch": 2.54, "grad_norm": 3.45804762840271, "learning_rate": 8.095238095238097e-06, "loss": 0.7179, "step": 200 }, { "epoch": 2.67, "grad_norm": 4.391757965087891, "learning_rate": 5.7142857142857145e-06, "loss": 0.665, "step": 210 }, { "epoch": 2.79, "grad_norm": 3.0836093425750732, "learning_rate": 3.3333333333333333e-06, "loss": 0.7787, "step": 220 }, { "epoch": 2.92, "grad_norm": 4.827792644500732, "learning_rate": 9.523809523809526e-07, "loss": 0.7472, "step": 230 }, { "epoch": 2.97, "eval_accuracy": 0.7916666666666666, "eval_loss": 0.7180711627006531, "eval_runtime": 32.3605, "eval_samples_per_second": 77.873, "eval_steps_per_second": 2.441, "step": 234 }, { "epoch": 2.97, "step": 234, "total_flos": 7.528934622159176e+17, "train_loss": 0.8272387634994637, "train_runtime": 368.3522, "train_samples_per_second": 82.095, "train_steps_per_second": 0.635 } ], "logging_steps": 10, "max_steps": 234, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 7.528934622159176e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }