|
{ |
|
"best_metric": 0.8960396039603961, |
|
"best_model_checkpoint": "Electrcical-IMAGE-finetuned-eurosat/checkpoint-313", |
|
"epoch": 19.649122807017545, |
|
"eval_steps": 500, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 10.917478561401367, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 1.8574, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 7.904279708862305, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 1.6143, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5346534653465347, |
|
"eval_loss": 1.2882142066955566, |
|
"eval_runtime": 3.1902, |
|
"eval_samples_per_second": 126.638, |
|
"eval_steps_per_second": 4.075, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 12.139655113220215, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 1.3248, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 6.75371789932251, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.1002, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 16.99814224243164, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 0.8597, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7648514851485149, |
|
"eval_loss": 0.7302265763282776, |
|
"eval_runtime": 3.1986, |
|
"eval_samples_per_second": 126.304, |
|
"eval_steps_per_second": 4.064, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 6.605724811553955, |
|
"learning_rate": 4.960317460317461e-05, |
|
"loss": 0.7264, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 9.7291841506958, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.6225, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 7.020862579345703, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.5858, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy": 0.8465346534653465, |
|
"eval_loss": 0.48486289381980896, |
|
"eval_runtime": 3.1164, |
|
"eval_samples_per_second": 129.637, |
|
"eval_steps_per_second": 4.171, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 5.680540561676025, |
|
"learning_rate": 4.662698412698413e-05, |
|
"loss": 0.5214, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 7.502087116241455, |
|
"learning_rate": 4.563492063492064e-05, |
|
"loss": 0.4695, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"grad_norm": 7.262702465057373, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 0.4332, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8613861386138614, |
|
"eval_loss": 0.4274178743362427, |
|
"eval_runtime": 3.1661, |
|
"eval_samples_per_second": 127.603, |
|
"eval_steps_per_second": 4.106, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 6.890863418579102, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 0.4297, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"grad_norm": 6.25393533706665, |
|
"learning_rate": 4.265873015873016e-05, |
|
"loss": 0.4149, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"grad_norm": 7.106054782867432, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.4054, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_accuracy": 0.8787128712871287, |
|
"eval_loss": 0.3687044382095337, |
|
"eval_runtime": 3.1309, |
|
"eval_samples_per_second": 129.037, |
|
"eval_steps_per_second": 4.152, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"grad_norm": 6.924484729766846, |
|
"learning_rate": 4.067460317460318e-05, |
|
"loss": 0.3727, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"grad_norm": 5.540406703948975, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.3299, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 6.109258651733398, |
|
"learning_rate": 3.8690476190476195e-05, |
|
"loss": 0.3826, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8613861386138614, |
|
"eval_loss": 0.37880900502204895, |
|
"eval_runtime": 3.1895, |
|
"eval_samples_per_second": 126.667, |
|
"eval_steps_per_second": 4.076, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"grad_norm": 4.945355415344238, |
|
"learning_rate": 3.76984126984127e-05, |
|
"loss": 0.3249, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 4.232621669769287, |
|
"learning_rate": 3.6706349206349205e-05, |
|
"loss": 0.3561, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_accuracy": 0.8935643564356436, |
|
"eval_loss": 0.36995938420295715, |
|
"eval_runtime": 3.1439, |
|
"eval_samples_per_second": 128.503, |
|
"eval_steps_per_second": 4.135, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 5.742563247680664, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.3097, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 6.4120402336120605, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.3167, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"grad_norm": 6.971658229827881, |
|
"learning_rate": 3.3730158730158734e-05, |
|
"loss": 0.2838, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8811881188118812, |
|
"eval_loss": 0.3550195097923279, |
|
"eval_runtime": 3.0809, |
|
"eval_samples_per_second": 131.129, |
|
"eval_steps_per_second": 4.22, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 6.908318042755127, |
|
"learning_rate": 3.273809523809524e-05, |
|
"loss": 0.2828, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"grad_norm": 5.122198104858398, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.267, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"grad_norm": 4.9148077964782715, |
|
"learning_rate": 3.075396825396826e-05, |
|
"loss": 0.2897, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_accuracy": 0.8886138613861386, |
|
"eval_loss": 0.3698354661464691, |
|
"eval_runtime": 3.2226, |
|
"eval_samples_per_second": 125.366, |
|
"eval_steps_per_second": 4.034, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"grad_norm": 6.60095739364624, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 0.3077, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"grad_norm": 5.082594394683838, |
|
"learning_rate": 2.876984126984127e-05, |
|
"loss": 0.245, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"grad_norm": 3.717313766479492, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.2519, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8836633663366337, |
|
"eval_loss": 0.34588712453842163, |
|
"eval_runtime": 3.2343, |
|
"eval_samples_per_second": 124.913, |
|
"eval_steps_per_second": 4.019, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"grad_norm": 4.068792819976807, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 0.2456, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"grad_norm": 4.915719509124756, |
|
"learning_rate": 2.5793650793650796e-05, |
|
"loss": 0.2599, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"grad_norm": 4.958366870880127, |
|
"learning_rate": 2.4801587301587305e-05, |
|
"loss": 0.2194, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_accuracy": 0.8960396039603961, |
|
"eval_loss": 0.3583451807498932, |
|
"eval_runtime": 3.1613, |
|
"eval_samples_per_second": 127.797, |
|
"eval_steps_per_second": 4.112, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"grad_norm": 8.63839054107666, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.2206, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"grad_norm": 6.567727565765381, |
|
"learning_rate": 2.281746031746032e-05, |
|
"loss": 0.22, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"grad_norm": 5.235540866851807, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 0.1955, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8886138613861386, |
|
"eval_loss": 0.3442213535308838, |
|
"eval_runtime": 3.1157, |
|
"eval_samples_per_second": 129.666, |
|
"eval_steps_per_second": 4.172, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"grad_norm": 5.771058559417725, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.1896, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"grad_norm": 4.845823287963867, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.1914, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"grad_norm": 6.198949813842773, |
|
"learning_rate": 1.884920634920635e-05, |
|
"loss": 0.2443, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"eval_accuracy": 0.8787128712871287, |
|
"eval_loss": 0.38009530305862427, |
|
"eval_runtime": 3.1836, |
|
"eval_samples_per_second": 126.902, |
|
"eval_steps_per_second": 4.083, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"grad_norm": 5.5330023765563965, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.1938, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"grad_norm": 6.819355487823486, |
|
"learning_rate": 1.6865079365079367e-05, |
|
"loss": 0.207, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8861386138613861, |
|
"eval_loss": 0.34993720054626465, |
|
"eval_runtime": 3.1367, |
|
"eval_samples_per_second": 128.798, |
|
"eval_steps_per_second": 4.144, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"grad_norm": 3.808520555496216, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.2176, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.39, |
|
"grad_norm": 4.6104631423950195, |
|
"learning_rate": 1.4880952380952381e-05, |
|
"loss": 0.1725, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"grad_norm": 7.437196731567383, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.2078, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_accuracy": 0.8836633663366337, |
|
"eval_loss": 0.3701125383377075, |
|
"eval_runtime": 3.2627, |
|
"eval_samples_per_second": 123.824, |
|
"eval_steps_per_second": 3.984, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"grad_norm": 4.747229099273682, |
|
"learning_rate": 1.2896825396825398e-05, |
|
"loss": 0.1775, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"grad_norm": 5.044639587402344, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.1731, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"grad_norm": 5.209843635559082, |
|
"learning_rate": 1.0912698412698414e-05, |
|
"loss": 0.1873, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8861386138613861, |
|
"eval_loss": 0.37732651829719543, |
|
"eval_runtime": 3.1869, |
|
"eval_samples_per_second": 126.767, |
|
"eval_steps_per_second": 4.079, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"grad_norm": 3.205880880355835, |
|
"learning_rate": 9.92063492063492e-06, |
|
"loss": 0.1728, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"grad_norm": 5.715033054351807, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.1746, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"grad_norm": 3.9077258110046387, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.1697, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_accuracy": 0.8861386138613861, |
|
"eval_loss": 0.37531015276908875, |
|
"eval_runtime": 3.1372, |
|
"eval_samples_per_second": 128.778, |
|
"eval_steps_per_second": 4.144, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"grad_norm": 6.2585344314575195, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.1774, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"grad_norm": 4.743368625640869, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 0.1602, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"grad_norm": 5.987907409667969, |
|
"learning_rate": 4.96031746031746e-06, |
|
"loss": 0.1812, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8910891089108911, |
|
"eval_loss": 0.3747256398200989, |
|
"eval_runtime": 3.3841, |
|
"eval_samples_per_second": 119.38, |
|
"eval_steps_per_second": 3.841, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"grad_norm": 4.54545259475708, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.1375, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"grad_norm": 5.2062249183654785, |
|
"learning_rate": 2.9761904761904763e-06, |
|
"loss": 0.1713, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"grad_norm": 5.484841823577881, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 0.151, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_accuracy": 0.8861386138613861, |
|
"eval_loss": 0.3735988438129425, |
|
"eval_runtime": 3.2943, |
|
"eval_samples_per_second": 122.636, |
|
"eval_steps_per_second": 3.946, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 19.3, |
|
"grad_norm": 5.083439350128174, |
|
"learning_rate": 9.92063492063492e-07, |
|
"loss": 0.163, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"grad_norm": 6.915902137756348, |
|
"learning_rate": 0.0, |
|
"loss": 0.1567, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"eval_accuracy": 0.8861386138613861, |
|
"eval_loss": 0.3726496696472168, |
|
"eval_runtime": 3.3611, |
|
"eval_samples_per_second": 120.199, |
|
"eval_steps_per_second": 3.868, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"step": 560, |
|
"total_flos": 1.7752639958663823e+18, |
|
"train_loss": 0.37542668517146793, |
|
"train_runtime": 934.7322, |
|
"train_samples_per_second": 77.755, |
|
"train_steps_per_second": 0.599 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.7752639958663823e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|