|
{ |
|
"best_metric": 0.8910891089108911, |
|
"best_model_checkpoint": "Electrcical-IMAGE-finetuned-eurosat/checkpoint-199", |
|
"epoch": 9.824561403508772, |
|
"eval_steps": 500, |
|
"global_step": 280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 10.464795112609863, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 1.7823, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 9.879911422729492, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.4879, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9824561403508771, |
|
"eval_accuracy": 0.7326732673267327, |
|
"eval_loss": 0.9158226847648621, |
|
"eval_runtime": 116.3827, |
|
"eval_samples_per_second": 3.471, |
|
"eval_steps_per_second": 0.112, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 7.517858028411865, |
|
"learning_rate": 4.960317460317461e-05, |
|
"loss": 1.1987, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 7.376903533935547, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.8925, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 8.916149139404297, |
|
"learning_rate": 4.563492063492064e-05, |
|
"loss": 0.7072, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8366336633663366, |
|
"eval_loss": 0.4648021161556244, |
|
"eval_runtime": 4.3367, |
|
"eval_samples_per_second": 93.158, |
|
"eval_steps_per_second": 2.998, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 8.09045124053955, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 0.665, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.456140350877193, |
|
"grad_norm": 9.807299613952637, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.5713, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 10.828336715698242, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.521, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.982456140350877, |
|
"eval_accuracy": 0.8712871287128713, |
|
"eval_loss": 0.3816491961479187, |
|
"eval_runtime": 4.1894, |
|
"eval_samples_per_second": 96.434, |
|
"eval_steps_per_second": 3.103, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 6.668788433074951, |
|
"learning_rate": 3.76984126984127e-05, |
|
"loss": 0.5152, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"grad_norm": 5.836483478546143, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.473, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.8596491228070176, |
|
"grad_norm": 5.453497409820557, |
|
"learning_rate": 3.3730158730158734e-05, |
|
"loss": 0.4664, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8564356435643564, |
|
"eval_loss": 0.40334010124206543, |
|
"eval_runtime": 4.398, |
|
"eval_samples_per_second": 91.86, |
|
"eval_steps_per_second": 2.956, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 4.2105263157894735, |
|
"grad_norm": 6.095137596130371, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.3831, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.56140350877193, |
|
"grad_norm": 6.058220386505127, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 0.4376, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.912280701754386, |
|
"grad_norm": 6.937771797180176, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.3944, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.982456140350877, |
|
"eval_accuracy": 0.8737623762376238, |
|
"eval_loss": 0.3690718114376068, |
|
"eval_runtime": 4.2858, |
|
"eval_samples_per_second": 94.264, |
|
"eval_steps_per_second": 3.033, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"grad_norm": 6.101373672485352, |
|
"learning_rate": 2.5793650793650796e-05, |
|
"loss": 0.3582, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.614035087719298, |
|
"grad_norm": 8.85653305053711, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.3584, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.964912280701754, |
|
"grad_norm": 7.283915996551514, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 0.3627, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8886138613861386, |
|
"eval_loss": 0.3214375674724579, |
|
"eval_runtime": 4.5245, |
|
"eval_samples_per_second": 89.292, |
|
"eval_steps_per_second": 2.873, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 6.315789473684211, |
|
"grad_norm": 5.079178810119629, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.3304, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 5.342247486114502, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.3298, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.982456140350877, |
|
"eval_accuracy": 0.8910891089108911, |
|
"eval_loss": 0.3172283470630646, |
|
"eval_runtime": 4.1397, |
|
"eval_samples_per_second": 97.593, |
|
"eval_steps_per_second": 3.14, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"grad_norm": 6.236889362335205, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.342, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.368421052631579, |
|
"grad_norm": 9.212471008300781, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.3288, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.719298245614035, |
|
"grad_norm": 5.810153484344482, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.3203, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8910891089108911, |
|
"eval_loss": 0.3060537278652191, |
|
"eval_runtime": 4.5988, |
|
"eval_samples_per_second": 87.848, |
|
"eval_steps_per_second": 2.827, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 8.070175438596491, |
|
"grad_norm": 5.650562763214111, |
|
"learning_rate": 9.92063492063492e-06, |
|
"loss": 0.3379, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 8.421052631578947, |
|
"grad_norm": 3.9896435737609863, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.2913, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 7.3288397789001465, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 0.2737, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.982456140350877, |
|
"eval_accuracy": 0.8861386138613861, |
|
"eval_loss": 0.3128886818885803, |
|
"eval_runtime": 4.2212, |
|
"eval_samples_per_second": 95.706, |
|
"eval_steps_per_second": 3.08, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 9.12280701754386, |
|
"grad_norm": 5.5674285888671875, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.2991, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 9.473684210526315, |
|
"grad_norm": 6.089910984039307, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 0.2943, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 9.824561403508772, |
|
"grad_norm": 5.232763290405273, |
|
"learning_rate": 0.0, |
|
"loss": 0.2728, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 9.824561403508772, |
|
"eval_accuracy": 0.8861386138613861, |
|
"eval_loss": 0.308758020401001, |
|
"eval_runtime": 4.3884, |
|
"eval_samples_per_second": 92.061, |
|
"eval_steps_per_second": 2.962, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 9.824561403508772, |
|
"step": 280, |
|
"total_flos": 8.878060093031055e+17, |
|
"train_loss": 0.5355421313217708, |
|
"train_runtime": 1702.5623, |
|
"train_samples_per_second": 21.344, |
|
"train_steps_per_second": 0.164 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 8.878060093031055e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|