{ "best_metric": 0.8910891089108911, "best_model_checkpoint": "Electrcical-IMAGE-finetuned-eurosat/checkpoint-199", "epoch": 9.824561403508772, "eval_steps": 500, "global_step": 280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3508771929824561, "grad_norm": 10.464795112609863, "learning_rate": 1.785714285714286e-05, "loss": 1.7823, "step": 10 }, { "epoch": 0.7017543859649122, "grad_norm": 9.879911422729492, "learning_rate": 3.571428571428572e-05, "loss": 1.4879, "step": 20 }, { "epoch": 0.9824561403508771, "eval_accuracy": 0.7326732673267327, "eval_loss": 0.9158226847648621, "eval_runtime": 116.3827, "eval_samples_per_second": 3.471, "eval_steps_per_second": 0.112, "step": 28 }, { "epoch": 1.0526315789473684, "grad_norm": 7.517858028411865, "learning_rate": 4.960317460317461e-05, "loss": 1.1987, "step": 30 }, { "epoch": 1.4035087719298245, "grad_norm": 7.376903533935547, "learning_rate": 4.761904761904762e-05, "loss": 0.8925, "step": 40 }, { "epoch": 1.7543859649122808, "grad_norm": 8.916149139404297, "learning_rate": 4.563492063492064e-05, "loss": 0.7072, "step": 50 }, { "epoch": 2.0, "eval_accuracy": 0.8366336633663366, "eval_loss": 0.4648021161556244, "eval_runtime": 4.3367, "eval_samples_per_second": 93.158, "eval_steps_per_second": 2.998, "step": 57 }, { "epoch": 2.1052631578947367, "grad_norm": 8.09045124053955, "learning_rate": 4.3650793650793655e-05, "loss": 0.665, "step": 60 }, { "epoch": 2.456140350877193, "grad_norm": 9.807299613952637, "learning_rate": 4.166666666666667e-05, "loss": 0.5713, "step": 70 }, { "epoch": 2.807017543859649, "grad_norm": 10.828336715698242, "learning_rate": 3.968253968253968e-05, "loss": 0.521, "step": 80 }, { "epoch": 2.982456140350877, "eval_accuracy": 0.8712871287128713, "eval_loss": 0.3816491961479187, "eval_runtime": 4.1894, "eval_samples_per_second": 96.434, "eval_steps_per_second": 3.103, "step": 85 }, { "epoch": 3.1578947368421053, "grad_norm": 6.668788433074951, "learning_rate": 3.76984126984127e-05, "loss": 0.5152, "step": 90 }, { "epoch": 3.5087719298245617, "grad_norm": 5.836483478546143, "learning_rate": 3.571428571428572e-05, "loss": 0.473, "step": 100 }, { "epoch": 3.8596491228070176, "grad_norm": 5.453497409820557, "learning_rate": 3.3730158730158734e-05, "loss": 0.4664, "step": 110 }, { "epoch": 4.0, "eval_accuracy": 0.8564356435643564, "eval_loss": 0.40334010124206543, "eval_runtime": 4.398, "eval_samples_per_second": 91.86, "eval_steps_per_second": 2.956, "step": 114 }, { "epoch": 4.2105263157894735, "grad_norm": 6.095137596130371, "learning_rate": 3.1746031746031745e-05, "loss": 0.3831, "step": 120 }, { "epoch": 4.56140350877193, "grad_norm": 6.058220386505127, "learning_rate": 2.9761904761904762e-05, "loss": 0.4376, "step": 130 }, { "epoch": 4.912280701754386, "grad_norm": 6.937771797180176, "learning_rate": 2.777777777777778e-05, "loss": 0.3944, "step": 140 }, { "epoch": 4.982456140350877, "eval_accuracy": 0.8737623762376238, "eval_loss": 0.3690718114376068, "eval_runtime": 4.2858, "eval_samples_per_second": 94.264, "eval_steps_per_second": 3.033, "step": 142 }, { "epoch": 5.2631578947368425, "grad_norm": 6.101373672485352, "learning_rate": 2.5793650793650796e-05, "loss": 0.3582, "step": 150 }, { "epoch": 5.614035087719298, "grad_norm": 8.85653305053711, "learning_rate": 2.380952380952381e-05, "loss": 0.3584, "step": 160 }, { "epoch": 5.964912280701754, "grad_norm": 7.283915996551514, "learning_rate": 2.1825396825396827e-05, "loss": 0.3627, "step": 170 }, { "epoch": 6.0, "eval_accuracy": 0.8886138613861386, "eval_loss": 0.3214375674724579, "eval_runtime": 4.5245, "eval_samples_per_second": 89.292, "eval_steps_per_second": 2.873, "step": 171 }, { "epoch": 6.315789473684211, "grad_norm": 5.079178810119629, "learning_rate": 1.984126984126984e-05, "loss": 0.3304, "step": 180 }, { "epoch": 6.666666666666667, "grad_norm": 5.342247486114502, "learning_rate": 1.785714285714286e-05, "loss": 0.3298, "step": 190 }, { "epoch": 6.982456140350877, "eval_accuracy": 0.8910891089108911, "eval_loss": 0.3172283470630646, "eval_runtime": 4.1397, "eval_samples_per_second": 97.593, "eval_steps_per_second": 3.14, "step": 199 }, { "epoch": 7.017543859649122, "grad_norm": 6.236889362335205, "learning_rate": 1.5873015873015872e-05, "loss": 0.342, "step": 200 }, { "epoch": 7.368421052631579, "grad_norm": 9.212471008300781, "learning_rate": 1.388888888888889e-05, "loss": 0.3288, "step": 210 }, { "epoch": 7.719298245614035, "grad_norm": 5.810153484344482, "learning_rate": 1.1904761904761905e-05, "loss": 0.3203, "step": 220 }, { "epoch": 8.0, "eval_accuracy": 0.8910891089108911, "eval_loss": 0.3060537278652191, "eval_runtime": 4.5988, "eval_samples_per_second": 87.848, "eval_steps_per_second": 2.827, "step": 228 }, { "epoch": 8.070175438596491, "grad_norm": 5.650562763214111, "learning_rate": 9.92063492063492e-06, "loss": 0.3379, "step": 230 }, { "epoch": 8.421052631578947, "grad_norm": 3.9896435737609863, "learning_rate": 7.936507936507936e-06, "loss": 0.2913, "step": 240 }, { "epoch": 8.771929824561404, "grad_norm": 7.3288397789001465, "learning_rate": 5.9523809523809525e-06, "loss": 0.2737, "step": 250 }, { "epoch": 8.982456140350877, "eval_accuracy": 0.8861386138613861, "eval_loss": 0.3128886818885803, "eval_runtime": 4.2212, "eval_samples_per_second": 95.706, "eval_steps_per_second": 3.08, "step": 256 }, { "epoch": 9.12280701754386, "grad_norm": 5.5674285888671875, "learning_rate": 3.968253968253968e-06, "loss": 0.2991, "step": 260 }, { "epoch": 9.473684210526315, "grad_norm": 6.089910984039307, "learning_rate": 1.984126984126984e-06, "loss": 0.2943, "step": 270 }, { "epoch": 9.824561403508772, "grad_norm": 5.232763290405273, "learning_rate": 0.0, "loss": 0.2728, "step": 280 }, { "epoch": 9.824561403508772, "eval_accuracy": 0.8861386138613861, "eval_loss": 0.308758020401001, "eval_runtime": 4.3884, "eval_samples_per_second": 92.061, "eval_steps_per_second": 2.962, "step": 280 }, { "epoch": 9.824561403508772, "step": 280, "total_flos": 8.878060093031055e+17, "train_loss": 0.5355421313217708, "train_runtime": 1702.5623, "train_samples_per_second": 21.344, "train_steps_per_second": 0.164 } ], "logging_steps": 10, "max_steps": 280, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 8.878060093031055e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }