{ "best_metric": 0.8960396039603961, "best_model_checkpoint": "Electrcical-IMAGE-finetuned-eurosat/checkpoint-313", "epoch": 19.649122807017545, "eval_steps": 500, "global_step": 560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.35, "grad_norm": 10.917478561401367, "learning_rate": 8.92857142857143e-06, "loss": 1.8574, "step": 10 }, { "epoch": 0.7, "grad_norm": 7.904279708862305, "learning_rate": 1.785714285714286e-05, "loss": 1.6143, "step": 20 }, { "epoch": 0.98, "eval_accuracy": 0.5346534653465347, "eval_loss": 1.2882142066955566, "eval_runtime": 3.1902, "eval_samples_per_second": 126.638, "eval_steps_per_second": 4.075, "step": 28 }, { "epoch": 1.05, "grad_norm": 12.139655113220215, "learning_rate": 2.6785714285714288e-05, "loss": 1.3248, "step": 30 }, { "epoch": 1.4, "grad_norm": 6.75371789932251, "learning_rate": 3.571428571428572e-05, "loss": 1.1002, "step": 40 }, { "epoch": 1.75, "grad_norm": 16.99814224243164, "learning_rate": 4.464285714285715e-05, "loss": 0.8597, "step": 50 }, { "epoch": 2.0, "eval_accuracy": 0.7648514851485149, "eval_loss": 0.7302265763282776, "eval_runtime": 3.1986, "eval_samples_per_second": 126.304, "eval_steps_per_second": 4.064, "step": 57 }, { "epoch": 2.11, "grad_norm": 6.605724811553955, "learning_rate": 4.960317460317461e-05, "loss": 0.7264, "step": 60 }, { "epoch": 2.46, "grad_norm": 9.7291841506958, "learning_rate": 4.8611111111111115e-05, "loss": 0.6225, "step": 70 }, { "epoch": 2.81, "grad_norm": 7.020862579345703, "learning_rate": 4.761904761904762e-05, "loss": 0.5858, "step": 80 }, { "epoch": 2.98, "eval_accuracy": 0.8465346534653465, "eval_loss": 0.48486289381980896, "eval_runtime": 3.1164, "eval_samples_per_second": 129.637, "eval_steps_per_second": 4.171, "step": 85 }, { "epoch": 3.16, "grad_norm": 5.680540561676025, "learning_rate": 4.662698412698413e-05, "loss": 0.5214, "step": 90 }, { "epoch": 3.51, "grad_norm": 7.502087116241455, "learning_rate": 4.563492063492064e-05, "loss": 0.4695, "step": 100 }, { "epoch": 3.86, "grad_norm": 7.262702465057373, "learning_rate": 4.464285714285715e-05, "loss": 0.4332, "step": 110 }, { "epoch": 4.0, "eval_accuracy": 0.8613861386138614, "eval_loss": 0.4274178743362427, "eval_runtime": 3.1661, "eval_samples_per_second": 127.603, "eval_steps_per_second": 4.106, "step": 114 }, { "epoch": 4.21, "grad_norm": 6.890863418579102, "learning_rate": 4.3650793650793655e-05, "loss": 0.4297, "step": 120 }, { "epoch": 4.56, "grad_norm": 6.25393533706665, "learning_rate": 4.265873015873016e-05, "loss": 0.4149, "step": 130 }, { "epoch": 4.91, "grad_norm": 7.106054782867432, "learning_rate": 4.166666666666667e-05, "loss": 0.4054, "step": 140 }, { "epoch": 4.98, "eval_accuracy": 0.8787128712871287, "eval_loss": 0.3687044382095337, "eval_runtime": 3.1309, "eval_samples_per_second": 129.037, "eval_steps_per_second": 4.152, "step": 142 }, { "epoch": 5.26, "grad_norm": 6.924484729766846, "learning_rate": 4.067460317460318e-05, "loss": 0.3727, "step": 150 }, { "epoch": 5.61, "grad_norm": 5.540406703948975, "learning_rate": 3.968253968253968e-05, "loss": 0.3299, "step": 160 }, { "epoch": 5.96, "grad_norm": 6.109258651733398, "learning_rate": 3.8690476190476195e-05, "loss": 0.3826, "step": 170 }, { "epoch": 6.0, "eval_accuracy": 0.8613861386138614, "eval_loss": 0.37880900502204895, "eval_runtime": 3.1895, "eval_samples_per_second": 126.667, "eval_steps_per_second": 4.076, "step": 171 }, { "epoch": 6.32, "grad_norm": 4.945355415344238, "learning_rate": 3.76984126984127e-05, "loss": 0.3249, "step": 180 }, { "epoch": 6.67, "grad_norm": 4.232621669769287, "learning_rate": 3.6706349206349205e-05, "loss": 0.3561, "step": 190 }, { "epoch": 6.98, "eval_accuracy": 0.8935643564356436, "eval_loss": 0.36995938420295715, "eval_runtime": 3.1439, "eval_samples_per_second": 128.503, "eval_steps_per_second": 4.135, "step": 199 }, { "epoch": 7.02, "grad_norm": 5.742563247680664, "learning_rate": 3.571428571428572e-05, "loss": 0.3097, "step": 200 }, { "epoch": 7.37, "grad_norm": 6.4120402336120605, "learning_rate": 3.472222222222222e-05, "loss": 0.3167, "step": 210 }, { "epoch": 7.72, "grad_norm": 6.971658229827881, "learning_rate": 3.3730158730158734e-05, "loss": 0.2838, "step": 220 }, { "epoch": 8.0, "eval_accuracy": 0.8811881188118812, "eval_loss": 0.3550195097923279, "eval_runtime": 3.0809, "eval_samples_per_second": 131.129, "eval_steps_per_second": 4.22, "step": 228 }, { "epoch": 8.07, "grad_norm": 6.908318042755127, "learning_rate": 3.273809523809524e-05, "loss": 0.2828, "step": 230 }, { "epoch": 8.42, "grad_norm": 5.122198104858398, "learning_rate": 3.1746031746031745e-05, "loss": 0.267, "step": 240 }, { "epoch": 8.77, "grad_norm": 4.9148077964782715, "learning_rate": 3.075396825396826e-05, "loss": 0.2897, "step": 250 }, { "epoch": 8.98, "eval_accuracy": 0.8886138613861386, "eval_loss": 0.3698354661464691, "eval_runtime": 3.2226, "eval_samples_per_second": 125.366, "eval_steps_per_second": 4.034, "step": 256 }, { "epoch": 9.12, "grad_norm": 6.60095739364624, "learning_rate": 2.9761904761904762e-05, "loss": 0.3077, "step": 260 }, { "epoch": 9.47, "grad_norm": 5.082594394683838, "learning_rate": 2.876984126984127e-05, "loss": 0.245, "step": 270 }, { "epoch": 9.82, "grad_norm": 3.717313766479492, "learning_rate": 2.777777777777778e-05, "loss": 0.2519, "step": 280 }, { "epoch": 10.0, "eval_accuracy": 0.8836633663366337, "eval_loss": 0.34588712453842163, "eval_runtime": 3.2343, "eval_samples_per_second": 124.913, "eval_steps_per_second": 4.019, "step": 285 }, { "epoch": 10.18, "grad_norm": 4.068792819976807, "learning_rate": 2.6785714285714288e-05, "loss": 0.2456, "step": 290 }, { "epoch": 10.53, "grad_norm": 4.915719509124756, "learning_rate": 2.5793650793650796e-05, "loss": 0.2599, "step": 300 }, { "epoch": 10.88, "grad_norm": 4.958366870880127, "learning_rate": 2.4801587301587305e-05, "loss": 0.2194, "step": 310 }, { "epoch": 10.98, "eval_accuracy": 0.8960396039603961, "eval_loss": 0.3583451807498932, "eval_runtime": 3.1613, "eval_samples_per_second": 127.797, "eval_steps_per_second": 4.112, "step": 313 }, { "epoch": 11.23, "grad_norm": 8.63839054107666, "learning_rate": 2.380952380952381e-05, "loss": 0.2206, "step": 320 }, { "epoch": 11.58, "grad_norm": 6.567727565765381, "learning_rate": 2.281746031746032e-05, "loss": 0.22, "step": 330 }, { "epoch": 11.93, "grad_norm": 5.235540866851807, "learning_rate": 2.1825396825396827e-05, "loss": 0.1955, "step": 340 }, { "epoch": 12.0, "eval_accuracy": 0.8886138613861386, "eval_loss": 0.3442213535308838, "eval_runtime": 3.1157, "eval_samples_per_second": 129.666, "eval_steps_per_second": 4.172, "step": 342 }, { "epoch": 12.28, "grad_norm": 5.771058559417725, "learning_rate": 2.0833333333333336e-05, "loss": 0.1896, "step": 350 }, { "epoch": 12.63, "grad_norm": 4.845823287963867, "learning_rate": 1.984126984126984e-05, "loss": 0.1914, "step": 360 }, { "epoch": 12.98, "grad_norm": 6.198949813842773, "learning_rate": 1.884920634920635e-05, "loss": 0.2443, "step": 370 }, { "epoch": 12.98, "eval_accuracy": 0.8787128712871287, "eval_loss": 0.38009530305862427, "eval_runtime": 3.1836, "eval_samples_per_second": 126.902, "eval_steps_per_second": 4.083, "step": 370 }, { "epoch": 13.33, "grad_norm": 5.5330023765563965, "learning_rate": 1.785714285714286e-05, "loss": 0.1938, "step": 380 }, { "epoch": 13.68, "grad_norm": 6.819355487823486, "learning_rate": 1.6865079365079367e-05, "loss": 0.207, "step": 390 }, { "epoch": 14.0, "eval_accuracy": 0.8861386138613861, "eval_loss": 0.34993720054626465, "eval_runtime": 3.1367, "eval_samples_per_second": 128.798, "eval_steps_per_second": 4.144, "step": 399 }, { "epoch": 14.04, "grad_norm": 3.808520555496216, "learning_rate": 1.5873015873015872e-05, "loss": 0.2176, "step": 400 }, { "epoch": 14.39, "grad_norm": 4.6104631423950195, "learning_rate": 1.4880952380952381e-05, "loss": 0.1725, "step": 410 }, { "epoch": 14.74, "grad_norm": 7.437196731567383, "learning_rate": 1.388888888888889e-05, "loss": 0.2078, "step": 420 }, { "epoch": 14.98, "eval_accuracy": 0.8836633663366337, "eval_loss": 0.3701125383377075, "eval_runtime": 3.2627, "eval_samples_per_second": 123.824, "eval_steps_per_second": 3.984, "step": 427 }, { "epoch": 15.09, "grad_norm": 4.747229099273682, "learning_rate": 1.2896825396825398e-05, "loss": 0.1775, "step": 430 }, { "epoch": 15.44, "grad_norm": 5.044639587402344, "learning_rate": 1.1904761904761905e-05, "loss": 0.1731, "step": 440 }, { "epoch": 15.79, "grad_norm": 5.209843635559082, "learning_rate": 1.0912698412698414e-05, "loss": 0.1873, "step": 450 }, { "epoch": 16.0, "eval_accuracy": 0.8861386138613861, "eval_loss": 0.37732651829719543, "eval_runtime": 3.1869, "eval_samples_per_second": 126.767, "eval_steps_per_second": 4.079, "step": 456 }, { "epoch": 16.14, "grad_norm": 3.205880880355835, "learning_rate": 9.92063492063492e-06, "loss": 0.1728, "step": 460 }, { "epoch": 16.49, "grad_norm": 5.715033054351807, "learning_rate": 8.92857142857143e-06, "loss": 0.1746, "step": 470 }, { "epoch": 16.84, "grad_norm": 3.9077258110046387, "learning_rate": 7.936507936507936e-06, "loss": 0.1697, "step": 480 }, { "epoch": 16.98, "eval_accuracy": 0.8861386138613861, "eval_loss": 0.37531015276908875, "eval_runtime": 3.1372, "eval_samples_per_second": 128.778, "eval_steps_per_second": 4.144, "step": 484 }, { "epoch": 17.19, "grad_norm": 6.2585344314575195, "learning_rate": 6.944444444444445e-06, "loss": 0.1774, "step": 490 }, { "epoch": 17.54, "grad_norm": 4.743368625640869, "learning_rate": 5.9523809523809525e-06, "loss": 0.1602, "step": 500 }, { "epoch": 17.89, "grad_norm": 5.987907409667969, "learning_rate": 4.96031746031746e-06, "loss": 0.1812, "step": 510 }, { "epoch": 18.0, "eval_accuracy": 0.8910891089108911, "eval_loss": 0.3747256398200989, "eval_runtime": 3.3841, "eval_samples_per_second": 119.38, "eval_steps_per_second": 3.841, "step": 513 }, { "epoch": 18.25, "grad_norm": 4.54545259475708, "learning_rate": 3.968253968253968e-06, "loss": 0.1375, "step": 520 }, { "epoch": 18.6, "grad_norm": 5.2062249183654785, "learning_rate": 2.9761904761904763e-06, "loss": 0.1713, "step": 530 }, { "epoch": 18.95, "grad_norm": 5.484841823577881, "learning_rate": 1.984126984126984e-06, "loss": 0.151, "step": 540 }, { "epoch": 18.98, "eval_accuracy": 0.8861386138613861, "eval_loss": 0.3735988438129425, "eval_runtime": 3.2943, "eval_samples_per_second": 122.636, "eval_steps_per_second": 3.946, "step": 541 }, { "epoch": 19.3, "grad_norm": 5.083439350128174, "learning_rate": 9.92063492063492e-07, "loss": 0.163, "step": 550 }, { "epoch": 19.65, "grad_norm": 6.915902137756348, "learning_rate": 0.0, "loss": 0.1567, "step": 560 }, { "epoch": 19.65, "eval_accuracy": 0.8861386138613861, "eval_loss": 0.3726496696472168, "eval_runtime": 3.3611, "eval_samples_per_second": 120.199, "eval_steps_per_second": 3.868, "step": 560 }, { "epoch": 19.65, "step": 560, "total_flos": 1.7752639958663823e+18, "train_loss": 0.37542668517146793, "train_runtime": 934.7322, "train_samples_per_second": 77.755, "train_steps_per_second": 0.599 } ], "logging_steps": 10, "max_steps": 560, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.7752639958663823e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }