{ "best_metric": 0.2866075932979584, "best_model_checkpoint": "./convnext-nano-1e-4-augment/checkpoint-2750", "epoch": 10.0, "eval_steps": 500, "global_step": 2750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "grad_norm": 11.396906852722168, "learning_rate": 9.967408676742751e-05, "loss": 1.7369, "step": 100 }, { "epoch": 0.73, "grad_norm": 14.72342300415039, "learning_rate": 9.870059584711668e-05, "loss": 0.8282, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.8536779324055666, "eval_loss": 0.5148155689239502, "eval_runtime": 61.4101, "eval_samples_per_second": 40.954, "eval_steps_per_second": 0.651, "step": 275 }, { "epoch": 1.09, "grad_norm": 16.179250717163086, "learning_rate": 9.709221818197624e-05, "loss": 0.6581, "step": 300 }, { "epoch": 1.45, "grad_norm": 10.215620994567871, "learning_rate": 9.486992143456792e-05, "loss": 0.5141, "step": 400 }, { "epoch": 1.82, "grad_norm": 8.687200546264648, "learning_rate": 9.206267664155907e-05, "loss": 0.5209, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.88389662027833, "eval_loss": 0.4150846004486084, "eval_runtime": 61.5346, "eval_samples_per_second": 40.871, "eval_steps_per_second": 0.65, "step": 550 }, { "epoch": 2.18, "grad_norm": 10.711565971374512, "learning_rate": 8.870708053195413e-05, "loss": 0.4456, "step": 600 }, { "epoch": 2.55, "grad_norm": 9.642666816711426, "learning_rate": 8.484687843276469e-05, "loss": 0.4067, "step": 700 }, { "epoch": 2.91, "grad_norm": 11.507894515991211, "learning_rate": 8.053239398177191e-05, "loss": 0.3867, "step": 800 }, { "epoch": 3.0, "eval_accuracy": 0.9009940357852882, "eval_loss": 0.3642739951610565, "eval_runtime": 61.3025, "eval_samples_per_second": 41.026, "eval_steps_per_second": 0.653, "step": 825 }, { "epoch": 3.27, "grad_norm": 13.279052734375, "learning_rate": 7.58198730819481e-05, "loss": 0.3452, "step": 900 }, { "epoch": 3.64, "grad_norm": 9.233952522277832, "learning_rate": 7.077075065009433e-05, "loss": 0.3216, "step": 1000 }, { "epoch": 4.0, "grad_norm": 9.883641242980957, "learning_rate": 6.545084971874738e-05, "loss": 0.3183, "step": 1100 }, { "epoch": 4.0, "eval_accuracy": 0.9049701789264414, "eval_loss": 0.3240828812122345, "eval_runtime": 61.2041, "eval_samples_per_second": 41.092, "eval_steps_per_second": 0.654, "step": 1100 }, { "epoch": 4.36, "grad_norm": 10.317498207092285, "learning_rate": 5.992952333228728e-05, "loss": 0.2789, "step": 1200 }, { "epoch": 4.73, "grad_norm": 6.958377838134766, "learning_rate": 5.427875042394199e-05, "loss": 0.2679, "step": 1300 }, { "epoch": 5.0, "eval_accuracy": 0.904572564612326, "eval_loss": 0.32900604605674744, "eval_runtime": 61.4874, "eval_samples_per_second": 40.903, "eval_steps_per_second": 0.651, "step": 1375 }, { "epoch": 5.09, "grad_norm": 7.9765119552612305, "learning_rate": 4.85721974603152e-05, "loss": 0.261, "step": 1400 }, { "epoch": 5.45, "grad_norm": 6.655781269073486, "learning_rate": 4.288425808633575e-05, "loss": 0.2243, "step": 1500 }, { "epoch": 5.82, "grad_norm": 9.778780937194824, "learning_rate": 3.728908329032567e-05, "loss": 0.2364, "step": 1600 }, { "epoch": 6.0, "eval_accuracy": 0.9137176938369781, "eval_loss": 0.30884459614753723, "eval_runtime": 61.4411, "eval_samples_per_second": 40.934, "eval_steps_per_second": 0.651, "step": 1650 }, { "epoch": 6.18, "grad_norm": 6.904848098754883, "learning_rate": 3.1859614732467954e-05, "loss": 0.2176, "step": 1700 }, { "epoch": 6.55, "grad_norm": 6.360904216766357, "learning_rate": 2.6666633838716314e-05, "loss": 0.1943, "step": 1800 }, { "epoch": 6.91, "grad_norm": 9.572250366210938, "learning_rate": 2.1777839056661554e-05, "loss": 0.1981, "step": 1900 }, { "epoch": 7.0, "eval_accuracy": 0.9137176938369781, "eval_loss": 0.29816073179244995, "eval_runtime": 62.0018, "eval_samples_per_second": 40.563, "eval_steps_per_second": 0.645, "step": 1925 }, { "epoch": 7.27, "grad_norm": 4.384122371673584, "learning_rate": 1.725696330273575e-05, "loss": 0.1778, "step": 2000 }, { "epoch": 7.64, "grad_norm": 7.344178199768066, "learning_rate": 1.3162943106179749e-05, "loss": 0.1692, "step": 2100 }, { "epoch": 8.0, "grad_norm": 5.066224575042725, "learning_rate": 9.549150281252633e-06, "loss": 0.1704, "step": 2200 }, { "epoch": 8.0, "eval_accuracy": 0.9168986083499006, "eval_loss": 0.2899409532546997, "eval_runtime": 61.3116, "eval_samples_per_second": 41.02, "eval_steps_per_second": 0.652, "step": 2200 }, { "epoch": 8.36, "grad_norm": 7.729918003082275, "learning_rate": 6.462696144011149e-06, "loss": 0.1627, "step": 2300 }, { "epoch": 8.73, "grad_norm": 9.76986312866211, "learning_rate": 3.9438173442575e-06, "loss": 0.1572, "step": 2400 }, { "epoch": 9.0, "eval_accuracy": 0.920079522862823, "eval_loss": 0.2868165671825409, "eval_runtime": 61.3101, "eval_samples_per_second": 41.021, "eval_steps_per_second": 0.652, "step": 2475 }, { "epoch": 9.09, "grad_norm": 7.546455383300781, "learning_rate": 2.0253513192751373e-06, "loss": 0.1548, "step": 2500 }, { "epoch": 9.45, "grad_norm": 8.111244201660156, "learning_rate": 7.323082076153509e-07, "loss": 0.1519, "step": 2600 }, { "epoch": 9.82, "grad_norm": 8.917200088500977, "learning_rate": 8.15448036932176e-08, "loss": 0.168, "step": 2700 }, { "epoch": 10.0, "eval_accuracy": 0.9204771371769384, "eval_loss": 0.2866075932979584, "eval_runtime": 61.2534, "eval_samples_per_second": 41.059, "eval_steps_per_second": 0.653, "step": 2750 }, { "epoch": 10.0, "step": 2750, "total_flos": 7.000491898906214e+18, "train_loss": 0.3542752295407382, "train_runtime": 6594.6916, "train_samples_per_second": 26.659, "train_steps_per_second": 0.417 } ], "logging_steps": 100, "max_steps": 2750, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 7.000491898906214e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }