{ "best_metric": 0.29478973150253296, "best_model_checkpoint": "./convnext-nano-3e-4-augment/checkpoint-2750", "epoch": 10.0, "eval_steps": 500, "global_step": 2750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "grad_norm": 17.436264038085938, "learning_rate": 0.00029902226030228247, "loss": 1.4532, "step": 100 }, { "epoch": 0.73, "grad_norm": 8.700443267822266, "learning_rate": 0.00029610178754135, "loss": 0.8119, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.827037773359841, "eval_loss": 0.5636539459228516, "eval_runtime": 61.1609, "eval_samples_per_second": 41.121, "eval_steps_per_second": 0.654, "step": 275 }, { "epoch": 1.09, "grad_norm": 19.5541934967041, "learning_rate": 0.0002912766545459287, "loss": 0.7009, "step": 300 }, { "epoch": 1.45, "grad_norm": 5.992691516876221, "learning_rate": 0.0002846097643037037, "loss": 0.5634, "step": 400 }, { "epoch": 1.82, "grad_norm": 10.371554374694824, "learning_rate": 0.0002761880299246772, "loss": 0.5829, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.8497017892644135, "eval_loss": 0.5015920400619507, "eval_runtime": 60.5306, "eval_samples_per_second": 41.549, "eval_steps_per_second": 0.661, "step": 550 }, { "epoch": 2.18, "grad_norm": 6.42030143737793, "learning_rate": 0.00026612124159586237, "loss": 0.5118, "step": 600 }, { "epoch": 2.55, "grad_norm": 5.376116752624512, "learning_rate": 0.00025454063529829405, "loss": 0.4771, "step": 700 }, { "epoch": 2.91, "grad_norm": 9.462407112121582, "learning_rate": 0.00024159718194531572, "loss": 0.4623, "step": 800 }, { "epoch": 3.0, "eval_accuracy": 0.8755467196819086, "eval_loss": 0.44941774010658264, "eval_runtime": 60.9271, "eval_samples_per_second": 41.279, "eval_steps_per_second": 0.657, "step": 825 }, { "epoch": 3.27, "grad_norm": 6.973038196563721, "learning_rate": 0.00022745961924584428, "loss": 0.3929, "step": 900 }, { "epoch": 3.64, "grad_norm": 6.476861953735352, "learning_rate": 0.00021231225195028297, "loss": 0.3711, "step": 1000 }, { "epoch": 4.0, "grad_norm": 6.22141170501709, "learning_rate": 0.0001963525491562421, "loss": 0.359, "step": 1100 }, { "epoch": 4.0, "eval_accuracy": 0.8886679920477137, "eval_loss": 0.3809404671192169, "eval_runtime": 61.1213, "eval_samples_per_second": 41.148, "eval_steps_per_second": 0.654, "step": 1100 }, { "epoch": 4.36, "grad_norm": 7.65138578414917, "learning_rate": 0.0001797885699968618, "loss": 0.3315, "step": 1200 }, { "epoch": 4.73, "grad_norm": 4.203142166137695, "learning_rate": 0.00016283625127182596, "loss": 0.2881, "step": 1300 }, { "epoch": 5.0, "eval_accuracy": 0.8998011928429424, "eval_loss": 0.3741509020328522, "eval_runtime": 61.4609, "eval_samples_per_second": 40.92, "eval_steps_per_second": 0.651, "step": 1375 }, { "epoch": 5.09, "grad_norm": 6.004030227661133, "learning_rate": 0.00014571659238094556, "loss": 0.286, "step": 1400 }, { "epoch": 5.45, "grad_norm": 6.018474578857422, "learning_rate": 0.00012865277425900724, "loss": 0.2292, "step": 1500 }, { "epoch": 5.82, "grad_norm": 4.614959716796875, "learning_rate": 0.00011186724987097698, "loss": 0.2302, "step": 1600 }, { "epoch": 6.0, "eval_accuracy": 0.9113320079522863, "eval_loss": 0.3401913046836853, "eval_runtime": 61.1216, "eval_samples_per_second": 41.147, "eval_steps_per_second": 0.654, "step": 1650 }, { "epoch": 6.18, "grad_norm": 4.227890968322754, "learning_rate": 9.557884419740386e-05, "loss": 0.2225, "step": 1700 }, { "epoch": 6.55, "grad_norm": 4.184410572052002, "learning_rate": 7.999990151614894e-05, "loss": 0.1851, "step": 1800 }, { "epoch": 6.91, "grad_norm": 6.824435234069824, "learning_rate": 6.533351716998465e-05, "loss": 0.1827, "step": 1900 }, { "epoch": 7.0, "eval_accuracy": 0.9121272365805169, "eval_loss": 0.31500810384750366, "eval_runtime": 60.793, "eval_samples_per_second": 41.37, "eval_steps_per_second": 0.658, "step": 1925 }, { "epoch": 7.27, "grad_norm": 3.374882221221924, "learning_rate": 5.1770889908207245e-05, "loss": 0.1551, "step": 2000 }, { "epoch": 7.64, "grad_norm": 4.39635705947876, "learning_rate": 3.948882931853924e-05, "loss": 0.1564, "step": 2100 }, { "epoch": 8.0, "grad_norm": 2.6348471641540527, "learning_rate": 2.8647450843757897e-05, "loss": 0.1466, "step": 2200 }, { "epoch": 8.0, "eval_accuracy": 0.9228628230616303, "eval_loss": 0.30119746923446655, "eval_runtime": 61.3308, "eval_samples_per_second": 41.007, "eval_steps_per_second": 0.652, "step": 2200 }, { "epoch": 8.36, "grad_norm": 5.662447929382324, "learning_rate": 1.9388088432033443e-05, "loss": 0.1236, "step": 2300 }, { "epoch": 8.73, "grad_norm": 0.9912707209587097, "learning_rate": 1.1831452032772498e-05, "loss": 0.1223, "step": 2400 }, { "epoch": 9.0, "eval_accuracy": 0.9248508946322067, "eval_loss": 0.2995615005493164, "eval_runtime": 60.6772, "eval_samples_per_second": 41.449, "eval_steps_per_second": 0.659, "step": 2475 }, { "epoch": 9.09, "grad_norm": 2.5069386959075928, "learning_rate": 6.076053957825411e-06, "loss": 0.1283, "step": 2500 }, { "epoch": 9.45, "grad_norm": 7.067609786987305, "learning_rate": 2.1969246228460523e-06, "loss": 0.1142, "step": 2600 }, { "epoch": 9.82, "grad_norm": 4.694984436035156, "learning_rate": 2.4463441107965276e-07, "loss": 0.1332, "step": 2700 }, { "epoch": 10.0, "eval_accuracy": 0.9248508946322067, "eval_loss": 0.29478973150253296, "eval_runtime": 61.4481, "eval_samples_per_second": 40.929, "eval_steps_per_second": 0.651, "step": 2750 }, { "epoch": 10.0, "step": 2750, "total_flos": 7.000491898906214e+18, "train_loss": 0.35536065743186257, "train_runtime": 6476.8205, "train_samples_per_second": 27.144, "train_steps_per_second": 0.425 } ], "logging_steps": 100, "max_steps": 2750, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 7.000491898906214e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }