|
{ |
|
"best_metric": 0.8787128712871287, |
|
"best_model_checkpoint": "Electrcical-IMAGE-finetuned-eurosat/checkpoint-199", |
|
"epoch": 19.649122807017545, |
|
"eval_steps": 500, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.11484432220459, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 1.726, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 4.320251941680908, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 1.5532, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.6163366336633663, |
|
"eval_loss": 1.170404314994812, |
|
"eval_runtime": 5.3474, |
|
"eval_samples_per_second": 75.551, |
|
"eval_steps_per_second": 2.431, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 4.363418102264404, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 1.2686, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 6.138254165649414, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.0232, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 7.400917053222656, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 0.8115, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7673267326732673, |
|
"eval_loss": 0.6826810836791992, |
|
"eval_runtime": 3.1989, |
|
"eval_samples_per_second": 126.295, |
|
"eval_steps_per_second": 4.064, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 7.412561893463135, |
|
"learning_rate": 4.960317460317461e-05, |
|
"loss": 0.6904, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 8.475811004638672, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.5908, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 32.85942077636719, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.5513, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy": 0.8415841584158416, |
|
"eval_loss": 0.4524537920951843, |
|
"eval_runtime": 3.2851, |
|
"eval_samples_per_second": 122.978, |
|
"eval_steps_per_second": 3.957, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 5.583408355712891, |
|
"learning_rate": 4.662698412698413e-05, |
|
"loss": 0.5107, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 7.957437038421631, |
|
"learning_rate": 4.563492063492064e-05, |
|
"loss": 0.4767, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"grad_norm": 6.342191219329834, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 0.455, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8539603960396039, |
|
"eval_loss": 0.40117621421813965, |
|
"eval_runtime": 3.1407, |
|
"eval_samples_per_second": 128.633, |
|
"eval_steps_per_second": 4.139, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 5.71556282043457, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 0.4405, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"grad_norm": 6.5584306716918945, |
|
"learning_rate": 4.265873015873016e-05, |
|
"loss": 0.4179, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"grad_norm": 6.672788143157959, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.3901, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_accuracy": 0.8613861386138614, |
|
"eval_loss": 0.38241395354270935, |
|
"eval_runtime": 3.2535, |
|
"eval_samples_per_second": 124.173, |
|
"eval_steps_per_second": 3.996, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"grad_norm": 8.260834693908691, |
|
"learning_rate": 4.067460317460318e-05, |
|
"loss": 0.3765, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"grad_norm": 9.854930877685547, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.3419, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 8.068675994873047, |
|
"learning_rate": 3.8690476190476195e-05, |
|
"loss": 0.4042, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8638613861386139, |
|
"eval_loss": 0.37970858812332153, |
|
"eval_runtime": 3.1263, |
|
"eval_samples_per_second": 129.226, |
|
"eval_steps_per_second": 4.158, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"grad_norm": 5.019181728363037, |
|
"learning_rate": 3.76984126984127e-05, |
|
"loss": 0.3335, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 5.983124256134033, |
|
"learning_rate": 3.6706349206349205e-05, |
|
"loss": 0.3591, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_accuracy": 0.8787128712871287, |
|
"eval_loss": 0.3504551351070404, |
|
"eval_runtime": 3.3207, |
|
"eval_samples_per_second": 121.661, |
|
"eval_steps_per_second": 3.915, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 5.728125095367432, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.3308, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 6.73638916015625, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.3228, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"grad_norm": 9.998391151428223, |
|
"learning_rate": 3.3730158730158734e-05, |
|
"loss": 0.2989, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8613861386138614, |
|
"eval_loss": 0.355110764503479, |
|
"eval_runtime": 3.1811, |
|
"eval_samples_per_second": 127.002, |
|
"eval_steps_per_second": 4.087, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 6.521678924560547, |
|
"learning_rate": 3.273809523809524e-05, |
|
"loss": 0.2857, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"grad_norm": 6.126306056976318, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.2748, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"grad_norm": 7.768061637878418, |
|
"learning_rate": 3.075396825396826e-05, |
|
"loss": 0.3029, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_accuracy": 0.8663366336633663, |
|
"eval_loss": 0.36247390508651733, |
|
"eval_runtime": 3.1822, |
|
"eval_samples_per_second": 126.958, |
|
"eval_steps_per_second": 4.085, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"grad_norm": 6.541659355163574, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 0.3138, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"grad_norm": 4.587341785430908, |
|
"learning_rate": 2.876984126984127e-05, |
|
"loss": 0.2556, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"grad_norm": 3.2555856704711914, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.2606, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.849009900990099, |
|
"eval_loss": 0.3615105450153351, |
|
"eval_runtime": 3.1469, |
|
"eval_samples_per_second": 128.38, |
|
"eval_steps_per_second": 4.131, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"grad_norm": 5.79010534286499, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 0.2589, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"grad_norm": 7.236560821533203, |
|
"learning_rate": 2.5793650793650796e-05, |
|
"loss": 0.2615, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"grad_norm": 6.618343353271484, |
|
"learning_rate": 2.4801587301587305e-05, |
|
"loss": 0.2413, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_accuracy": 0.8787128712871287, |
|
"eval_loss": 0.34345176815986633, |
|
"eval_runtime": 3.2537, |
|
"eval_samples_per_second": 124.168, |
|
"eval_steps_per_second": 3.996, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"grad_norm": 7.067264556884766, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.2329, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"grad_norm": 4.156674861907959, |
|
"learning_rate": 2.281746031746032e-05, |
|
"loss": 0.2286, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"grad_norm": 6.207752227783203, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 0.2051, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8663366336633663, |
|
"eval_loss": 0.3371087908744812, |
|
"eval_runtime": 3.105, |
|
"eval_samples_per_second": 130.112, |
|
"eval_steps_per_second": 4.187, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"grad_norm": 4.795997142791748, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.2052, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"grad_norm": 5.227004528045654, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.1962, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"grad_norm": 6.650285720825195, |
|
"learning_rate": 1.884920634920635e-05, |
|
"loss": 0.2477, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"eval_accuracy": 0.8638613861386139, |
|
"eval_loss": 0.34505516290664673, |
|
"eval_runtime": 3.309, |
|
"eval_samples_per_second": 122.091, |
|
"eval_steps_per_second": 3.929, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"grad_norm": 4.851602554321289, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.2151, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"grad_norm": 6.44926118850708, |
|
"learning_rate": 1.6865079365079367e-05, |
|
"loss": 0.2271, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8737623762376238, |
|
"eval_loss": 0.3363986909389496, |
|
"eval_runtime": 3.1022, |
|
"eval_samples_per_second": 130.23, |
|
"eval_steps_per_second": 4.191, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"grad_norm": 4.615256309509277, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.2164, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.39, |
|
"grad_norm": 4.279232025146484, |
|
"learning_rate": 1.4880952380952381e-05, |
|
"loss": 0.1937, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"grad_norm": 5.792173385620117, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.2112, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_accuracy": 0.8638613861386139, |
|
"eval_loss": 0.35587263107299805, |
|
"eval_runtime": 3.6554, |
|
"eval_samples_per_second": 110.522, |
|
"eval_steps_per_second": 3.556, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"grad_norm": 4.082373142242432, |
|
"learning_rate": 1.2896825396825398e-05, |
|
"loss": 0.1815, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"grad_norm": 6.473873138427734, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.1884, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"grad_norm": 3.9221067428588867, |
|
"learning_rate": 1.0912698412698414e-05, |
|
"loss": 0.1902, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8737623762376238, |
|
"eval_loss": 0.36296331882476807, |
|
"eval_runtime": 3.2193, |
|
"eval_samples_per_second": 125.494, |
|
"eval_steps_per_second": 4.038, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"grad_norm": 3.29888916015625, |
|
"learning_rate": 9.92063492063492e-06, |
|
"loss": 0.1834, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"grad_norm": 5.7562432289123535, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.1797, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"grad_norm": 4.14888858795166, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.1739, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_accuracy": 0.8712871287128713, |
|
"eval_loss": 0.36299142241477966, |
|
"eval_runtime": 3.2007, |
|
"eval_samples_per_second": 126.223, |
|
"eval_steps_per_second": 4.062, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"grad_norm": 5.445935249328613, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.1723, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"grad_norm": 10.137904167175293, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 0.1721, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"grad_norm": 6.753917217254639, |
|
"learning_rate": 4.96031746031746e-06, |
|
"loss": 0.195, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8663366336633663, |
|
"eval_loss": 0.3625456690788269, |
|
"eval_runtime": 3.1415, |
|
"eval_samples_per_second": 128.599, |
|
"eval_steps_per_second": 4.138, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"grad_norm": 5.913514614105225, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.138, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"grad_norm": 5.631173610687256, |
|
"learning_rate": 2.9761904761904763e-06, |
|
"loss": 0.1784, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"grad_norm": 3.3359873294830322, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 0.1621, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_accuracy": 0.8762376237623762, |
|
"eval_loss": 0.35711079835891724, |
|
"eval_runtime": 3.3137, |
|
"eval_samples_per_second": 121.916, |
|
"eval_steps_per_second": 3.923, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 19.3, |
|
"grad_norm": 4.744650363922119, |
|
"learning_rate": 9.92063492063492e-07, |
|
"loss": 0.1726, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"grad_norm": 5.039443492889404, |
|
"learning_rate": 0.0, |
|
"loss": 0.154, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"eval_accuracy": 0.8737623762376238, |
|
"eval_loss": 0.3554615080356598, |
|
"eval_runtime": 3.3898, |
|
"eval_samples_per_second": 119.18, |
|
"eval_steps_per_second": 3.835, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"step": 560, |
|
"total_flos": 1.7752639958663823e+18, |
|
"train_loss": 0.3740995813693319, |
|
"train_runtime": 1232.3243, |
|
"train_samples_per_second": 58.978, |
|
"train_steps_per_second": 0.454 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.7752639958663823e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|