{ "best_metric": null, "best_model_checkpoint": null, "epoch": 72.99270072992701, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.82, "learning_rate": 4.875e-05, "loss": 10.1798, "step": 250 }, { "epoch": 1.82, "eval_test_accuracy": 0.0, "eval_test_loss": 3.4965455532073975, "eval_test_runtime": 7.5045, "eval_test_samples_per_second": 162.702, "eval_test_steps_per_second": 2.665, "step": 250 }, { "epoch": 3.65, "learning_rate": 4.75e-05, "loss": 4.0133, "step": 500 }, { "epoch": 3.65, "eval_test_accuracy": 0.085995085995086, "eval_test_loss": 2.1827144622802734, "eval_test_runtime": 11.227, "eval_test_samples_per_second": 108.755, "eval_test_steps_per_second": 1.781, "step": 500 }, { "epoch": 5.47, "learning_rate": 4.6250000000000006e-05, "loss": 2.1831, "step": 750 }, { "epoch": 5.47, "eval_test_accuracy": 0.28992628992628994, "eval_test_loss": 1.1154128313064575, "eval_test_runtime": 4.9154, "eval_test_samples_per_second": 248.402, "eval_test_steps_per_second": 4.069, "step": 750 }, { "epoch": 7.3, "learning_rate": 4.5e-05, "loss": 1.178, "step": 1000 }, { "epoch": 7.3, "eval_test_accuracy": 0.4430794430794431, "eval_test_loss": 0.7581946849822998, "eval_test_runtime": 5.2157, "eval_test_samples_per_second": 234.1, "eval_test_steps_per_second": 3.835, "step": 1000 }, { "epoch": 9.12, "learning_rate": 4.375e-05, "loss": 0.8603, "step": 1250 }, { "epoch": 9.12, "eval_test_accuracy": 0.506961506961507, "eval_test_loss": 0.6857301592826843, "eval_test_runtime": 4.1605, "eval_test_samples_per_second": 293.474, "eval_test_steps_per_second": 4.807, "step": 1250 }, { "epoch": 10.95, "learning_rate": 4.25e-05, "loss": 0.7179, "step": 1500 }, { "epoch": 10.95, "eval_test_accuracy": 0.5356265356265356, "eval_test_loss": 0.629558801651001, "eval_test_runtime": 4.1543, "eval_test_samples_per_second": 293.909, "eval_test_steps_per_second": 4.814, "step": 1500 }, { "epoch": 12.77, "learning_rate": 4.125e-05, "loss": 0.6347, "step": 1750 }, { "epoch": 12.77, "eval_test_accuracy": 0.556920556920557, "eval_test_loss": 0.6828880310058594, "eval_test_runtime": 4.1527, "eval_test_samples_per_second": 294.026, "eval_test_steps_per_second": 4.816, "step": 1750 }, { "epoch": 14.6, "learning_rate": 4e-05, "loss": 0.5714, "step": 2000 }, { "epoch": 14.6, "eval_test_accuracy": 0.5683865683865684, "eval_test_loss": 0.6402557492256165, "eval_test_runtime": 4.1126, "eval_test_samples_per_second": 296.89, "eval_test_steps_per_second": 4.863, "step": 2000 }, { "epoch": 16.42, "learning_rate": 3.875e-05, "loss": 0.535, "step": 2250 }, { "epoch": 16.42, "eval_test_accuracy": 0.5823095823095823, "eval_test_loss": 0.6427932381629944, "eval_test_runtime": 4.1425, "eval_test_samples_per_second": 294.751, "eval_test_steps_per_second": 4.828, "step": 2250 }, { "epoch": 18.25, "learning_rate": 3.7500000000000003e-05, "loss": 0.4864, "step": 2500 }, { "epoch": 18.25, "eval_test_accuracy": 0.5749385749385749, "eval_test_loss": 0.6692995429039001, "eval_test_runtime": 4.1218, "eval_test_samples_per_second": 296.233, "eval_test_steps_per_second": 4.852, "step": 2500 }, { "epoch": 20.07, "learning_rate": 3.625e-05, "loss": 0.4523, "step": 2750 }, { "epoch": 20.07, "eval_test_accuracy": 0.588042588042588, "eval_test_loss": 0.6854296326637268, "eval_test_runtime": 4.1256, "eval_test_samples_per_second": 295.954, "eval_test_steps_per_second": 4.848, "step": 2750 }, { "epoch": 21.9, "learning_rate": 3.5e-05, "loss": 0.4267, "step": 3000 }, { "epoch": 21.9, "eval_test_accuracy": 0.5847665847665847, "eval_test_loss": 0.6832742691040039, "eval_test_runtime": 4.114, "eval_test_samples_per_second": 296.79, "eval_test_steps_per_second": 4.861, "step": 3000 }, { "epoch": 23.72, "learning_rate": 3.375000000000001e-05, "loss": 0.4017, "step": 3250 }, { "epoch": 23.72, "eval_test_accuracy": 0.5864045864045864, "eval_test_loss": 0.7026733756065369, "eval_test_runtime": 4.162, "eval_test_samples_per_second": 293.366, "eval_test_steps_per_second": 4.805, "step": 3250 }, { "epoch": 25.55, "learning_rate": 3.2500000000000004e-05, "loss": 0.3737, "step": 3500 }, { "epoch": 25.55, "eval_test_accuracy": 0.5823095823095823, "eval_test_loss": 0.7358095645904541, "eval_test_runtime": 4.1139, "eval_test_samples_per_second": 296.797, "eval_test_steps_per_second": 4.862, "step": 3500 }, { "epoch": 27.37, "learning_rate": 3.125e-05, "loss": 0.3567, "step": 3750 }, { "epoch": 27.37, "eval_test_accuracy": 0.583947583947584, "eval_test_loss": 0.7573221921920776, "eval_test_runtime": 4.1462, "eval_test_samples_per_second": 294.489, "eval_test_steps_per_second": 4.824, "step": 3750 }, { "epoch": 29.2, "learning_rate": 3e-05, "loss": 0.3329, "step": 4000 }, { "epoch": 29.2, "eval_test_accuracy": 0.5831285831285832, "eval_test_loss": 0.7671645283699036, "eval_test_runtime": 4.1876, "eval_test_samples_per_second": 291.577, "eval_test_steps_per_second": 4.776, "step": 4000 }, { "epoch": 31.02, "learning_rate": 2.8749999999999997e-05, "loss": 0.3178, "step": 4250 }, { "epoch": 31.02, "eval_test_accuracy": 0.5937755937755937, "eval_test_loss": 0.8280954360961914, "eval_test_runtime": 4.1401, "eval_test_samples_per_second": 294.919, "eval_test_steps_per_second": 4.831, "step": 4250 }, { "epoch": 32.85, "learning_rate": 2.7500000000000004e-05, "loss": 0.3031, "step": 4500 }, { "epoch": 32.85, "eval_test_accuracy": 0.5954135954135954, "eval_test_loss": 0.8298905491828918, "eval_test_runtime": 4.1172, "eval_test_samples_per_second": 296.557, "eval_test_steps_per_second": 4.858, "step": 4500 }, { "epoch": 34.67, "learning_rate": 2.625e-05, "loss": 0.2942, "step": 4750 }, { "epoch": 34.67, "eval_test_accuracy": 0.592956592956593, "eval_test_loss": 0.8406508564949036, "eval_test_runtime": 4.1428, "eval_test_samples_per_second": 294.726, "eval_test_steps_per_second": 4.828, "step": 4750 }, { "epoch": 36.5, "learning_rate": 2.5e-05, "loss": 0.2794, "step": 5000 }, { "epoch": 36.5, "eval_test_accuracy": 0.6003276003276004, "eval_test_loss": 0.8442530035972595, "eval_test_runtime": 4.3235, "eval_test_samples_per_second": 282.409, "eval_test_steps_per_second": 4.626, "step": 5000 }, { "epoch": 38.32, "learning_rate": 2.375e-05, "loss": 0.2733, "step": 5250 }, { "epoch": 38.32, "eval_test_accuracy": 0.6052416052416052, "eval_test_loss": 0.8638033270835876, "eval_test_runtime": 4.1266, "eval_test_samples_per_second": 295.887, "eval_test_steps_per_second": 4.847, "step": 5250 }, { "epoch": 40.15, "learning_rate": 2.25e-05, "loss": 0.2631, "step": 5500 }, { "epoch": 40.15, "eval_test_accuracy": 0.5888615888615889, "eval_test_loss": 0.890779435634613, "eval_test_runtime": 4.1284, "eval_test_samples_per_second": 295.759, "eval_test_steps_per_second": 4.845, "step": 5500 }, { "epoch": 41.97, "learning_rate": 2.125e-05, "loss": 0.2574, "step": 5750 }, { "epoch": 41.97, "eval_test_accuracy": 0.588042588042588, "eval_test_loss": 0.9194920063018799, "eval_test_runtime": 4.2329, "eval_test_samples_per_second": 288.451, "eval_test_steps_per_second": 4.725, "step": 5750 }, { "epoch": 43.8, "learning_rate": 2e-05, "loss": 0.2445, "step": 6000 }, { "epoch": 43.8, "eval_test_accuracy": 0.5913185913185913, "eval_test_loss": 0.9236257672309875, "eval_test_runtime": 4.1684, "eval_test_samples_per_second": 292.916, "eval_test_steps_per_second": 4.798, "step": 6000 }, { "epoch": 45.62, "learning_rate": 1.8750000000000002e-05, "loss": 0.2417, "step": 6250 }, { "epoch": 45.62, "eval_test_accuracy": 0.5913185913185913, "eval_test_loss": 0.9303093552589417, "eval_test_runtime": 4.1896, "eval_test_samples_per_second": 291.435, "eval_test_steps_per_second": 4.774, "step": 6250 }, { "epoch": 47.45, "learning_rate": 1.75e-05, "loss": 0.2316, "step": 6500 }, { "epoch": 47.45, "eval_test_accuracy": 0.6060606060606061, "eval_test_loss": 0.9456475377082825, "eval_test_runtime": 4.1609, "eval_test_samples_per_second": 293.446, "eval_test_steps_per_second": 4.807, "step": 6500 }, { "epoch": 49.27, "learning_rate": 1.6250000000000002e-05, "loss": 0.227, "step": 6750 }, { "epoch": 49.27, "eval_test_accuracy": 0.5978705978705978, "eval_test_loss": 0.9745798110961914, "eval_test_runtime": 4.1394, "eval_test_samples_per_second": 294.971, "eval_test_steps_per_second": 4.832, "step": 6750 }, { "epoch": 51.09, "learning_rate": 1.5e-05, "loss": 0.2241, "step": 7000 }, { "epoch": 51.09, "eval_test_accuracy": 0.6052416052416052, "eval_test_loss": 0.938654899597168, "eval_test_runtime": 4.1652, "eval_test_samples_per_second": 293.143, "eval_test_steps_per_second": 4.802, "step": 7000 }, { "epoch": 52.92, "learning_rate": 1.3750000000000002e-05, "loss": 0.2174, "step": 7250 }, { "epoch": 52.92, "eval_test_accuracy": 0.5986895986895987, "eval_test_loss": 0.9762380719184875, "eval_test_runtime": 4.2021, "eval_test_samples_per_second": 290.57, "eval_test_steps_per_second": 4.76, "step": 7250 }, { "epoch": 54.74, "learning_rate": 1.25e-05, "loss": 0.212, "step": 7500 }, { "epoch": 54.74, "eval_test_accuracy": 0.601965601965602, "eval_test_loss": 0.9834132194519043, "eval_test_runtime": 4.1906, "eval_test_samples_per_second": 291.369, "eval_test_steps_per_second": 4.773, "step": 7500 }, { "epoch": 56.57, "learning_rate": 1.125e-05, "loss": 0.206, "step": 7750 }, { "epoch": 56.57, "eval_test_accuracy": 0.5995085995085995, "eval_test_loss": 0.9860948920249939, "eval_test_runtime": 4.1715, "eval_test_samples_per_second": 292.702, "eval_test_steps_per_second": 4.794, "step": 7750 }, { "epoch": 58.39, "learning_rate": 1e-05, "loss": 0.2057, "step": 8000 }, { "epoch": 58.39, "eval_test_accuracy": 0.5962325962325963, "eval_test_loss": 1.0094884634017944, "eval_test_runtime": 4.2216, "eval_test_samples_per_second": 289.23, "eval_test_steps_per_second": 4.738, "step": 8000 }, { "epoch": 60.22, "learning_rate": 8.75e-06, "loss": 0.2023, "step": 8250 }, { "epoch": 60.22, "eval_test_accuracy": 0.597051597051597, "eval_test_loss": 1.000124216079712, "eval_test_runtime": 4.1702, "eval_test_samples_per_second": 292.793, "eval_test_steps_per_second": 4.796, "step": 8250 }, { "epoch": 62.04, "learning_rate": 7.5e-06, "loss": 0.1994, "step": 8500 }, { "epoch": 62.04, "eval_test_accuracy": 0.5995085995085995, "eval_test_loss": 1.0179657936096191, "eval_test_runtime": 4.1982, "eval_test_samples_per_second": 290.842, "eval_test_steps_per_second": 4.764, "step": 8500 }, { "epoch": 63.87, "learning_rate": 6.25e-06, "loss": 0.1967, "step": 8750 }, { "epoch": 63.87, "eval_test_accuracy": 0.6044226044226044, "eval_test_loss": 1.0143113136291504, "eval_test_runtime": 4.1544, "eval_test_samples_per_second": 293.907, "eval_test_steps_per_second": 4.814, "step": 8750 }, { "epoch": 65.69, "learning_rate": 5e-06, "loss": 0.1915, "step": 9000 }, { "epoch": 65.69, "eval_test_accuracy": 0.6011466011466011, "eval_test_loss": 1.0377224683761597, "eval_test_runtime": 4.1791, "eval_test_samples_per_second": 292.165, "eval_test_steps_per_second": 4.786, "step": 9000 }, { "epoch": 67.52, "learning_rate": 3.75e-06, "loss": 0.1934, "step": 9250 }, { "epoch": 67.52, "eval_test_accuracy": 0.601965601965602, "eval_test_loss": 1.02960205078125, "eval_test_runtime": 4.2049, "eval_test_samples_per_second": 290.372, "eval_test_steps_per_second": 4.756, "step": 9250 }, { "epoch": 69.34, "learning_rate": 2.5e-06, "loss": 0.1932, "step": 9500 }, { "epoch": 69.34, "eval_test_accuracy": 0.601965601965602, "eval_test_loss": 1.0294890403747559, "eval_test_runtime": 4.1796, "eval_test_samples_per_second": 292.135, "eval_test_steps_per_second": 4.785, "step": 9500 }, { "epoch": 71.17, "learning_rate": 1.25e-06, "loss": 0.1898, "step": 9750 }, { "epoch": 71.17, "eval_test_accuracy": 0.6011466011466011, "eval_test_loss": 1.0313055515289307, "eval_test_runtime": 4.1318, "eval_test_samples_per_second": 295.51, "eval_test_steps_per_second": 4.84, "step": 9750 }, { "epoch": 72.99, "learning_rate": 0.0, "loss": 0.1916, "step": 10000 }, { "epoch": 72.99, "eval_test_accuracy": 0.6011466011466011, "eval_test_loss": 1.0304898023605347, "eval_test_runtime": 4.1756, "eval_test_samples_per_second": 292.415, "eval_test_steps_per_second": 4.79, "step": 10000 } ], "max_steps": 10000, "num_train_epochs": 73, "total_flos": 0.0, "trial_name": null, "trial_params": null }