{ "best_metric": 0.7738095238095238, "best_model_checkpoint": "Human-action-swin/checkpoint-234", "epoch": 2.9714285714285715, "eval_steps": 500, "global_step": 234, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "grad_norm": 8.177011489868164, "learning_rate": 2.0833333333333336e-05, "loss": 2.8077, "step": 10 }, { "epoch": 0.25, "grad_norm": 7.242084980010986, "learning_rate": 4.166666666666667e-05, "loss": 2.6464, "step": 20 }, { "epoch": 0.38, "grad_norm": 6.426877498626709, "learning_rate": 4.8571428571428576e-05, "loss": 2.402, "step": 30 }, { "epoch": 0.51, "grad_norm": 12.246893882751465, "learning_rate": 4.6190476190476194e-05, "loss": 2.0132, "step": 40 }, { "epoch": 0.63, "grad_norm": 20.22879981994629, "learning_rate": 4.380952380952381e-05, "loss": 1.6849, "step": 50 }, { "epoch": 0.76, "grad_norm": 11.127490043640137, "learning_rate": 4.1428571428571437e-05, "loss": 1.5244, "step": 60 }, { "epoch": 0.89, "grad_norm": 8.744277954101562, "learning_rate": 3.904761904761905e-05, "loss": 1.4273, "step": 70 }, { "epoch": 0.99, "eval_accuracy": 0.671031746031746, "eval_loss": 1.046990990638733, "eval_runtime": 38.8701, "eval_samples_per_second": 64.831, "eval_steps_per_second": 2.032, "step": 78 }, { "epoch": 1.02, "grad_norm": 7.494091033935547, "learning_rate": 3.6666666666666666e-05, "loss": 1.3184, "step": 80 }, { "epoch": 1.14, "grad_norm": 6.497593879699707, "learning_rate": 3.428571428571429e-05, "loss": 1.2244, "step": 90 }, { "epoch": 1.27, "grad_norm": 10.031980514526367, "learning_rate": 3.19047619047619e-05, "loss": 1.2127, "step": 100 }, { "epoch": 1.4, "grad_norm": 8.93614673614502, "learning_rate": 2.9523809523809526e-05, "loss": 1.1732, "step": 110 }, { "epoch": 1.52, "grad_norm": 8.977005958557129, "learning_rate": 2.714285714285714e-05, "loss": 1.1427, "step": 120 }, { "epoch": 1.65, "grad_norm": 8.760551452636719, "learning_rate": 2.4761904761904762e-05, "loss": 1.1373, "step": 130 }, { "epoch": 1.78, "grad_norm": 8.87807846069336, "learning_rate": 2.2380952380952384e-05, "loss": 1.1024, "step": 140 }, { "epoch": 1.9, "grad_norm": 7.670358657836914, "learning_rate": 2e-05, "loss": 1.0813, "step": 150 }, { "epoch": 1.99, "eval_accuracy": 0.7511904761904762, "eval_loss": 0.7847604155540466, "eval_runtime": 40.8525, "eval_samples_per_second": 61.685, "eval_steps_per_second": 1.934, "step": 157 }, { "epoch": 2.03, "grad_norm": 9.62759780883789, "learning_rate": 1.761904761904762e-05, "loss": 1.0885, "step": 160 }, { "epoch": 2.16, "grad_norm": 8.1283540725708, "learning_rate": 1.5238095238095241e-05, "loss": 1.0769, "step": 170 }, { "epoch": 2.29, "grad_norm": 8.88747501373291, "learning_rate": 1.2857142857142857e-05, "loss": 1.0129, "step": 180 }, { "epoch": 2.41, "grad_norm": 9.25979995727539, "learning_rate": 1.0476190476190477e-05, "loss": 1.0048, "step": 190 }, { "epoch": 2.54, "grad_norm": 7.371565818786621, "learning_rate": 8.095238095238097e-06, "loss": 0.9917, "step": 200 }, { "epoch": 2.67, "grad_norm": 6.723045349121094, "learning_rate": 5.7142857142857145e-06, "loss": 0.9229, "step": 210 }, { "epoch": 2.79, "grad_norm": 7.953981399536133, "learning_rate": 3.3333333333333333e-06, "loss": 1.0518, "step": 220 }, { "epoch": 2.92, "grad_norm": 7.315853118896484, "learning_rate": 9.523809523809526e-07, "loss": 0.9644, "step": 230 }, { "epoch": 2.97, "eval_accuracy": 0.7738095238095238, "eval_loss": 0.7221077084541321, "eval_runtime": 40.8941, "eval_samples_per_second": 61.623, "eval_steps_per_second": 1.932, "step": 234 }, { "epoch": 2.97, "step": 234, "total_flos": 7.447570178286551e+17, "train_loss": 1.3846357653283665, "train_runtime": 446.4628, "train_samples_per_second": 67.732, "train_steps_per_second": 0.524 } ], "logging_steps": 10, "max_steps": 234, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 7.447570178286551e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }