Safawat's picture
End of training
056dde1 verified
raw
history blame
No virus
8.09 kB
{
"best_metric": 0.8910891089108911,
"best_model_checkpoint": "Electrcical-IMAGE-finetuned-eurosat/checkpoint-199",
"epoch": 9.824561403508772,
"eval_steps": 500,
"global_step": 280,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3508771929824561,
"grad_norm": 10.464795112609863,
"learning_rate": 1.785714285714286e-05,
"loss": 1.7823,
"step": 10
},
{
"epoch": 0.7017543859649122,
"grad_norm": 9.879911422729492,
"learning_rate": 3.571428571428572e-05,
"loss": 1.4879,
"step": 20
},
{
"epoch": 0.9824561403508771,
"eval_accuracy": 0.7326732673267327,
"eval_loss": 0.9158226847648621,
"eval_runtime": 116.3827,
"eval_samples_per_second": 3.471,
"eval_steps_per_second": 0.112,
"step": 28
},
{
"epoch": 1.0526315789473684,
"grad_norm": 7.517858028411865,
"learning_rate": 4.960317460317461e-05,
"loss": 1.1987,
"step": 30
},
{
"epoch": 1.4035087719298245,
"grad_norm": 7.376903533935547,
"learning_rate": 4.761904761904762e-05,
"loss": 0.8925,
"step": 40
},
{
"epoch": 1.7543859649122808,
"grad_norm": 8.916149139404297,
"learning_rate": 4.563492063492064e-05,
"loss": 0.7072,
"step": 50
},
{
"epoch": 2.0,
"eval_accuracy": 0.8366336633663366,
"eval_loss": 0.4648021161556244,
"eval_runtime": 4.3367,
"eval_samples_per_second": 93.158,
"eval_steps_per_second": 2.998,
"step": 57
},
{
"epoch": 2.1052631578947367,
"grad_norm": 8.09045124053955,
"learning_rate": 4.3650793650793655e-05,
"loss": 0.665,
"step": 60
},
{
"epoch": 2.456140350877193,
"grad_norm": 9.807299613952637,
"learning_rate": 4.166666666666667e-05,
"loss": 0.5713,
"step": 70
},
{
"epoch": 2.807017543859649,
"grad_norm": 10.828336715698242,
"learning_rate": 3.968253968253968e-05,
"loss": 0.521,
"step": 80
},
{
"epoch": 2.982456140350877,
"eval_accuracy": 0.8712871287128713,
"eval_loss": 0.3816491961479187,
"eval_runtime": 4.1894,
"eval_samples_per_second": 96.434,
"eval_steps_per_second": 3.103,
"step": 85
},
{
"epoch": 3.1578947368421053,
"grad_norm": 6.668788433074951,
"learning_rate": 3.76984126984127e-05,
"loss": 0.5152,
"step": 90
},
{
"epoch": 3.5087719298245617,
"grad_norm": 5.836483478546143,
"learning_rate": 3.571428571428572e-05,
"loss": 0.473,
"step": 100
},
{
"epoch": 3.8596491228070176,
"grad_norm": 5.453497409820557,
"learning_rate": 3.3730158730158734e-05,
"loss": 0.4664,
"step": 110
},
{
"epoch": 4.0,
"eval_accuracy": 0.8564356435643564,
"eval_loss": 0.40334010124206543,
"eval_runtime": 4.398,
"eval_samples_per_second": 91.86,
"eval_steps_per_second": 2.956,
"step": 114
},
{
"epoch": 4.2105263157894735,
"grad_norm": 6.095137596130371,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.3831,
"step": 120
},
{
"epoch": 4.56140350877193,
"grad_norm": 6.058220386505127,
"learning_rate": 2.9761904761904762e-05,
"loss": 0.4376,
"step": 130
},
{
"epoch": 4.912280701754386,
"grad_norm": 6.937771797180176,
"learning_rate": 2.777777777777778e-05,
"loss": 0.3944,
"step": 140
},
{
"epoch": 4.982456140350877,
"eval_accuracy": 0.8737623762376238,
"eval_loss": 0.3690718114376068,
"eval_runtime": 4.2858,
"eval_samples_per_second": 94.264,
"eval_steps_per_second": 3.033,
"step": 142
},
{
"epoch": 5.2631578947368425,
"grad_norm": 6.101373672485352,
"learning_rate": 2.5793650793650796e-05,
"loss": 0.3582,
"step": 150
},
{
"epoch": 5.614035087719298,
"grad_norm": 8.85653305053711,
"learning_rate": 2.380952380952381e-05,
"loss": 0.3584,
"step": 160
},
{
"epoch": 5.964912280701754,
"grad_norm": 7.283915996551514,
"learning_rate": 2.1825396825396827e-05,
"loss": 0.3627,
"step": 170
},
{
"epoch": 6.0,
"eval_accuracy": 0.8886138613861386,
"eval_loss": 0.3214375674724579,
"eval_runtime": 4.5245,
"eval_samples_per_second": 89.292,
"eval_steps_per_second": 2.873,
"step": 171
},
{
"epoch": 6.315789473684211,
"grad_norm": 5.079178810119629,
"learning_rate": 1.984126984126984e-05,
"loss": 0.3304,
"step": 180
},
{
"epoch": 6.666666666666667,
"grad_norm": 5.342247486114502,
"learning_rate": 1.785714285714286e-05,
"loss": 0.3298,
"step": 190
},
{
"epoch": 6.982456140350877,
"eval_accuracy": 0.8910891089108911,
"eval_loss": 0.3172283470630646,
"eval_runtime": 4.1397,
"eval_samples_per_second": 97.593,
"eval_steps_per_second": 3.14,
"step": 199
},
{
"epoch": 7.017543859649122,
"grad_norm": 6.236889362335205,
"learning_rate": 1.5873015873015872e-05,
"loss": 0.342,
"step": 200
},
{
"epoch": 7.368421052631579,
"grad_norm": 9.212471008300781,
"learning_rate": 1.388888888888889e-05,
"loss": 0.3288,
"step": 210
},
{
"epoch": 7.719298245614035,
"grad_norm": 5.810153484344482,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.3203,
"step": 220
},
{
"epoch": 8.0,
"eval_accuracy": 0.8910891089108911,
"eval_loss": 0.3060537278652191,
"eval_runtime": 4.5988,
"eval_samples_per_second": 87.848,
"eval_steps_per_second": 2.827,
"step": 228
},
{
"epoch": 8.070175438596491,
"grad_norm": 5.650562763214111,
"learning_rate": 9.92063492063492e-06,
"loss": 0.3379,
"step": 230
},
{
"epoch": 8.421052631578947,
"grad_norm": 3.9896435737609863,
"learning_rate": 7.936507936507936e-06,
"loss": 0.2913,
"step": 240
},
{
"epoch": 8.771929824561404,
"grad_norm": 7.3288397789001465,
"learning_rate": 5.9523809523809525e-06,
"loss": 0.2737,
"step": 250
},
{
"epoch": 8.982456140350877,
"eval_accuracy": 0.8861386138613861,
"eval_loss": 0.3128886818885803,
"eval_runtime": 4.2212,
"eval_samples_per_second": 95.706,
"eval_steps_per_second": 3.08,
"step": 256
},
{
"epoch": 9.12280701754386,
"grad_norm": 5.5674285888671875,
"learning_rate": 3.968253968253968e-06,
"loss": 0.2991,
"step": 260
},
{
"epoch": 9.473684210526315,
"grad_norm": 6.089910984039307,
"learning_rate": 1.984126984126984e-06,
"loss": 0.2943,
"step": 270
},
{
"epoch": 9.824561403508772,
"grad_norm": 5.232763290405273,
"learning_rate": 0.0,
"loss": 0.2728,
"step": 280
},
{
"epoch": 9.824561403508772,
"eval_accuracy": 0.8861386138613861,
"eval_loss": 0.308758020401001,
"eval_runtime": 4.3884,
"eval_samples_per_second": 92.061,
"eval_steps_per_second": 2.962,
"step": 280
},
{
"epoch": 9.824561403508772,
"step": 280,
"total_flos": 8.878060093031055e+17,
"train_loss": 0.5355421313217708,
"train_runtime": 1702.5623,
"train_samples_per_second": 21.344,
"train_steps_per_second": 0.164
}
],
"logging_steps": 10,
"max_steps": 280,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 8.878060093031055e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}