{ "best_metric": 0.9115511551155115, "best_model_checkpoint": "food-image-classification/checkpoint-35000", "epoch": 54.91024287222809, "eval_steps": 1000, "global_step": 52000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.06, "learning_rate": 1.0559662090813095e-06, "loss": 4.6112, "step": 1000 }, { "epoch": 1.06, "eval_accuracy": 0.034851485148514855, "eval_loss": 4.575930118560791, "eval_runtime": 157.053, "eval_samples_per_second": 96.464, "eval_steps_per_second": 6.03, "step": 1000 }, { "epoch": 2.11, "learning_rate": 2.111932418162619e-06, "loss": 4.4899, "step": 2000 }, { "epoch": 2.11, "eval_accuracy": 0.3103630363036304, "eval_loss": 4.3788862228393555, "eval_runtime": 154.3948, "eval_samples_per_second": 98.125, "eval_steps_per_second": 6.134, "step": 2000 }, { "epoch": 3.17, "learning_rate": 3.167898627243928e-06, "loss": 4.2111, "step": 3000 }, { "epoch": 3.17, "eval_accuracy": 0.5498349834983498, "eval_loss": 4.030922889709473, "eval_runtime": 155.2257, "eval_samples_per_second": 97.6, "eval_steps_per_second": 6.101, "step": 3000 }, { "epoch": 4.22, "learning_rate": 4.223864836325238e-06, "loss": 3.8257, "step": 4000 }, { "epoch": 4.22, "eval_accuracy": 0.6111551155115511, "eval_loss": 3.634243965148926, "eval_runtime": 156.2293, "eval_samples_per_second": 96.973, "eval_steps_per_second": 6.062, "step": 4000 }, { "epoch": 5.28, "learning_rate": 5.279831045406547e-06, "loss": 3.4182, "step": 5000 }, { "epoch": 5.28, "eval_accuracy": 0.6514851485148515, "eval_loss": 3.225186586380005, "eval_runtime": 154.6911, "eval_samples_per_second": 97.937, "eval_steps_per_second": 6.122, "step": 5000 }, { "epoch": 6.34, "learning_rate": 6.335797254487856e-06, "loss": 2.9962, "step": 6000 }, { "epoch": 6.34, "eval_accuracy": 0.687062706270627, "eval_loss": 2.805878162384033, "eval_runtime": 154.6826, "eval_samples_per_second": 97.943, "eval_steps_per_second": 6.122, "step": 6000 }, { "epoch": 7.39, "learning_rate": 7.3917634635691666e-06, "loss": 2.5605, "step": 7000 }, { "epoch": 7.39, "eval_accuracy": 0.7070627062706271, "eval_loss": 2.382246494293213, "eval_runtime": 155.5967, "eval_samples_per_second": 97.367, "eval_steps_per_second": 6.086, "step": 7000 }, { "epoch": 8.45, "learning_rate": 8.447729672650476e-06, "loss": 2.1397, "step": 8000 }, { "epoch": 8.45, "eval_accuracy": 0.7335973597359736, "eval_loss": 1.975380539894104, "eval_runtime": 156.8793, "eval_samples_per_second": 96.571, "eval_steps_per_second": 6.036, "step": 8000 }, { "epoch": 9.5, "learning_rate": 9.503695881731786e-06, "loss": 1.7383, "step": 9000 }, { "epoch": 9.5, "eval_accuracy": 0.7576897689768977, "eval_loss": 1.608676552772522, "eval_runtime": 154.5661, "eval_samples_per_second": 98.016, "eval_steps_per_second": 6.127, "step": 9000 }, { "epoch": 10.56, "learning_rate": 1.0559662090813093e-05, "loss": 1.3909, "step": 10000 }, { "epoch": 10.56, "eval_accuracy": 0.7758415841584159, "eval_loss": 1.3203929662704468, "eval_runtime": 155.6061, "eval_samples_per_second": 97.361, "eval_steps_per_second": 6.086, "step": 10000 }, { "epoch": 11.62, "learning_rate": 1.1615628299894405e-05, "loss": 1.1223, "step": 11000 }, { "epoch": 11.62, "eval_accuracy": 0.7831683168316832, "eval_loss": 1.1283260583877563, "eval_runtime": 153.8564, "eval_samples_per_second": 98.468, "eval_steps_per_second": 6.155, "step": 11000 }, { "epoch": 12.67, "learning_rate": 1.2671594508975712e-05, "loss": 0.9312, "step": 12000 }, { "epoch": 12.67, "eval_accuracy": 0.7946534653465347, "eval_loss": 0.9766868352890015, "eval_runtime": 155.0819, "eval_samples_per_second": 97.69, "eval_steps_per_second": 6.106, "step": 12000 }, { "epoch": 13.73, "learning_rate": 1.3727560718057022e-05, "loss": 0.7817, "step": 13000 }, { "epoch": 13.73, "eval_accuracy": 0.7984158415841585, "eval_loss": 0.8917332887649536, "eval_runtime": 154.0819, "eval_samples_per_second": 98.324, "eval_steps_per_second": 6.146, "step": 13000 }, { "epoch": 14.78, "learning_rate": 1.4783526927138333e-05, "loss": 0.697, "step": 14000 }, { "epoch": 14.78, "eval_accuracy": 0.8043564356435644, "eval_loss": 0.8234559297561646, "eval_runtime": 153.6348, "eval_samples_per_second": 98.61, "eval_steps_per_second": 6.164, "step": 14000 }, { "epoch": 15.84, "learning_rate": 1.583949313621964e-05, "loss": 0.6281, "step": 15000 }, { "epoch": 15.84, "eval_accuracy": 0.8075247524752476, "eval_loss": 0.7959182858467102, "eval_runtime": 153.9402, "eval_samples_per_second": 98.415, "eval_steps_per_second": 6.152, "step": 15000 }, { "epoch": 16.9, "learning_rate": 1.6895459345300952e-05, "loss": 0.5659, "step": 16000 }, { "epoch": 16.9, "eval_accuracy": 0.8158415841584158, "eval_loss": 0.7547946572303772, "eval_runtime": 155.6477, "eval_samples_per_second": 97.335, "eval_steps_per_second": 6.084, "step": 16000 }, { "epoch": 17.95, "learning_rate": 1.795142555438226e-05, "loss": 0.5198, "step": 17000 }, { "epoch": 17.95, "eval_accuracy": 0.8104950495049505, "eval_loss": 0.7739244103431702, "eval_runtime": 155.4026, "eval_samples_per_second": 97.489, "eval_steps_per_second": 6.094, "step": 17000 }, { "epoch": 19.01, "learning_rate": 1.900739176346357e-05, "loss": 0.4951, "step": 18000 }, { "epoch": 19.01, "eval_accuracy": 0.8151815181518152, "eval_loss": 0.7517885565757751, "eval_runtime": 153.2443, "eval_samples_per_second": 98.862, "eval_steps_per_second": 6.18, "step": 18000 }, { "epoch": 20.06, "learning_rate": 2.006335797254488e-05, "loss": 0.4656, "step": 19000 }, { "epoch": 20.06, "eval_accuracy": 0.8158415841584158, "eval_loss": 0.7491214275360107, "eval_runtime": 155.8688, "eval_samples_per_second": 97.197, "eval_steps_per_second": 6.076, "step": 19000 }, { "epoch": 21.12, "learning_rate": 2.1119324181626187e-05, "loss": 0.4385, "step": 20000 }, { "epoch": 21.12, "eval_accuracy": 0.8151815181518152, "eval_loss": 0.7403990030288696, "eval_runtime": 154.5972, "eval_samples_per_second": 97.997, "eval_steps_per_second": 6.126, "step": 20000 }, { "epoch": 22.18, "learning_rate": 2.21752903907075e-05, "loss": 0.4148, "step": 21000 }, { "epoch": 22.18, "eval_accuracy": 0.8112871287128713, "eval_loss": 0.7465632557868958, "eval_runtime": 155.3899, "eval_samples_per_second": 97.497, "eval_steps_per_second": 6.094, "step": 21000 }, { "epoch": 23.23, "learning_rate": 2.323125659978881e-05, "loss": 0.3926, "step": 22000 }, { "epoch": 23.23, "eval_accuracy": 0.8201980198019801, "eval_loss": 0.7243059873580933, "eval_runtime": 153.8982, "eval_samples_per_second": 98.442, "eval_steps_per_second": 6.153, "step": 22000 }, { "epoch": 24.29, "learning_rate": 2.4287222808870115e-05, "loss": 0.3785, "step": 23000 }, { "epoch": 24.29, "eval_accuracy": 0.8103630363036304, "eval_loss": 0.7593609690666199, "eval_runtime": 155.7313, "eval_samples_per_second": 97.283, "eval_steps_per_second": 6.081, "step": 23000 }, { "epoch": 25.34, "learning_rate": 2.5343189017951425e-05, "loss": 0.3574, "step": 24000 }, { "epoch": 25.34, "eval_accuracy": 0.815973597359736, "eval_loss": 0.7465734481811523, "eval_runtime": 154.8193, "eval_samples_per_second": 97.856, "eval_steps_per_second": 6.117, "step": 24000 }, { "epoch": 26.4, "learning_rate": 2.6399155227032734e-05, "loss": 0.3438, "step": 25000 }, { "epoch": 26.4, "eval_accuracy": 0.8155775577557756, "eval_loss": 0.7651433944702148, "eval_runtime": 154.1129, "eval_samples_per_second": 98.305, "eval_steps_per_second": 6.145, "step": 25000 }, { "epoch": 27.46, "learning_rate": 2.7455121436114044e-05, "loss": 0.3274, "step": 26000 }, { "epoch": 27.46, "eval_accuracy": 0.8148514851485148, "eval_loss": 0.760901927947998, "eval_runtime": 155.6779, "eval_samples_per_second": 97.316, "eval_steps_per_second": 6.083, "step": 26000 }, { "epoch": 28.51, "learning_rate": 1.0559662090813095e-06, "loss": 0.3793, "step": 27000 }, { "epoch": 28.51, "eval_accuracy": 0.9097029702970297, "eval_loss": 0.3658629059791565, "eval_runtime": 156.004, "eval_samples_per_second": 97.113, "eval_steps_per_second": 6.07, "step": 27000 }, { "epoch": 29.57, "learning_rate": 2.111932418162619e-06, "loss": 0.355, "step": 28000 }, { "epoch": 29.57, "eval_accuracy": 0.9113531353135313, "eval_loss": 0.35932043194770813, "eval_runtime": 156.9177, "eval_samples_per_second": 96.547, "eval_steps_per_second": 6.035, "step": 28000 }, { "epoch": 30.62, "learning_rate": 3.167898627243928e-06, "loss": 0.3494, "step": 29000 }, { "epoch": 30.62, "eval_accuracy": 0.9111551155115512, "eval_loss": 0.3594682812690735, "eval_runtime": 154.85, "eval_samples_per_second": 97.837, "eval_steps_per_second": 6.116, "step": 29000 }, { "epoch": 31.68, "learning_rate": 4.223864836325238e-06, "loss": 0.3297, "step": 30000 }, { "epoch": 31.68, "eval_accuracy": 0.9101650165016502, "eval_loss": 0.35688260197639465, "eval_runtime": 155.9707, "eval_samples_per_second": 97.134, "eval_steps_per_second": 6.072, "step": 30000 }, { "epoch": 32.73, "learning_rate": 5.279831045406547e-06, "loss": 0.3252, "step": 31000 }, { "epoch": 32.73, "eval_accuracy": 0.9083168316831683, "eval_loss": 0.3627123236656189, "eval_runtime": 156.6773, "eval_samples_per_second": 96.696, "eval_steps_per_second": 6.044, "step": 31000 }, { "epoch": 33.79, "learning_rate": 6.335797254487856e-06, "loss": 0.3189, "step": 32000 }, { "epoch": 33.79, "eval_accuracy": 0.9108910891089109, "eval_loss": 0.35579344630241394, "eval_runtime": 155.255, "eval_samples_per_second": 97.581, "eval_steps_per_second": 6.1, "step": 32000 }, { "epoch": 34.85, "learning_rate": 7.3917634635691666e-06, "loss": 0.3064, "step": 33000 }, { "epoch": 34.85, "eval_accuracy": 0.9067326732673268, "eval_loss": 0.3623407185077667, "eval_runtime": 155.6758, "eval_samples_per_second": 97.318, "eval_steps_per_second": 6.083, "step": 33000 }, { "epoch": 35.9, "learning_rate": 8.447729672650476e-06, "loss": 0.3, "step": 34000 }, { "epoch": 35.9, "eval_accuracy": 0.9073927392739274, "eval_loss": 0.36413270235061646, "eval_runtime": 156.8691, "eval_samples_per_second": 96.577, "eval_steps_per_second": 6.037, "step": 34000 }, { "epoch": 36.96, "learning_rate": 9.503695881731786e-06, "loss": 0.289, "step": 35000 }, { "epoch": 36.96, "eval_accuracy": 0.9115511551155115, "eval_loss": 0.34877872467041016, "eval_runtime": 156.4541, "eval_samples_per_second": 96.834, "eval_steps_per_second": 6.053, "step": 35000 }, { "epoch": 38.01, "learning_rate": 1.0559662090813093e-05, "loss": 0.2811, "step": 36000 }, { "epoch": 38.01, "eval_accuracy": 0.90996699669967, "eval_loss": 0.3593011796474457, "eval_runtime": 155.6871, "eval_samples_per_second": 97.311, "eval_steps_per_second": 6.083, "step": 36000 }, { "epoch": 39.07, "learning_rate": 1.1615628299894405e-05, "loss": 0.2674, "step": 37000 }, { "epoch": 39.07, "eval_accuracy": 0.9037623762376238, "eval_loss": 0.37522387504577637, "eval_runtime": 155.8581, "eval_samples_per_second": 97.204, "eval_steps_per_second": 6.076, "step": 37000 }, { "epoch": 40.13, "learning_rate": 1.2671594508975712e-05, "loss": 0.2644, "step": 38000 }, { "epoch": 40.13, "eval_accuracy": 0.9054785478547854, "eval_loss": 0.3814030885696411, "eval_runtime": 155.1819, "eval_samples_per_second": 97.627, "eval_steps_per_second": 6.103, "step": 38000 }, { "epoch": 41.18, "learning_rate": 1.3727560718057022e-05, "loss": 0.2585, "step": 39000 }, { "epoch": 41.18, "eval_accuracy": 0.9052805280528052, "eval_loss": 0.3803286850452423, "eval_runtime": 154.4972, "eval_samples_per_second": 98.06, "eval_steps_per_second": 6.13, "step": 39000 }, { "epoch": 42.24, "learning_rate": 1.4783526927138333e-05, "loss": 0.2581, "step": 40000 }, { "epoch": 42.24, "eval_accuracy": 0.9038283828382838, "eval_loss": 0.37817618250846863, "eval_runtime": 156.2669, "eval_samples_per_second": 96.95, "eval_steps_per_second": 6.06, "step": 40000 }, { "epoch": 43.29, "learning_rate": 1.583949313621964e-05, "loss": 0.2516, "step": 41000 }, { "epoch": 43.29, "eval_accuracy": 0.9011221122112211, "eval_loss": 0.39773184061050415, "eval_runtime": 155.7863, "eval_samples_per_second": 97.249, "eval_steps_per_second": 6.079, "step": 41000 }, { "epoch": 44.35, "learning_rate": 1.6895459345300952e-05, "loss": 0.2431, "step": 42000 }, { "epoch": 44.35, "eval_accuracy": 0.8992079207920792, "eval_loss": 0.40550053119659424, "eval_runtime": 154.9661, "eval_samples_per_second": 97.763, "eval_steps_per_second": 6.111, "step": 42000 }, { "epoch": 45.41, "learning_rate": 1.795142555438226e-05, "loss": 0.2429, "step": 43000 }, { "epoch": 45.41, "eval_accuracy": 0.8975577557755775, "eval_loss": 0.4172586500644684, "eval_runtime": 154.5817, "eval_samples_per_second": 98.006, "eval_steps_per_second": 6.126, "step": 43000 }, { "epoch": 46.46, "learning_rate": 1.900739176346357e-05, "loss": 0.2406, "step": 44000 }, { "epoch": 46.46, "eval_accuracy": 0.893993399339934, "eval_loss": 0.4206344783306122, "eval_runtime": 156.2155, "eval_samples_per_second": 96.981, "eval_steps_per_second": 6.062, "step": 44000 }, { "epoch": 47.52, "learning_rate": 2.006335797254488e-05, "loss": 0.2351, "step": 45000 }, { "epoch": 47.52, "eval_accuracy": 0.8926732673267327, "eval_loss": 0.4330624043941498, "eval_runtime": 154.5274, "eval_samples_per_second": 98.041, "eval_steps_per_second": 6.128, "step": 45000 }, { "epoch": 48.57, "learning_rate": 2.1119324181626187e-05, "loss": 0.2333, "step": 46000 }, { "epoch": 48.57, "eval_accuracy": 0.8938613861386139, "eval_loss": 0.43345457315444946, "eval_runtime": 155.962, "eval_samples_per_second": 97.139, "eval_steps_per_second": 6.072, "step": 46000 }, { "epoch": 49.63, "learning_rate": 2.21752903907075e-05, "loss": 0.2231, "step": 47000 }, { "epoch": 49.63, "eval_accuracy": 0.8863366336633663, "eval_loss": 0.45071929693222046, "eval_runtime": 156.5229, "eval_samples_per_second": 96.791, "eval_steps_per_second": 6.05, "step": 47000 }, { "epoch": 50.69, "learning_rate": 2.323125659978881e-05, "loss": 0.2247, "step": 48000 }, { "epoch": 50.69, "eval_accuracy": 0.8875907590759076, "eval_loss": 0.4481562674045563, "eval_runtime": 155.8513, "eval_samples_per_second": 97.208, "eval_steps_per_second": 6.076, "step": 48000 }, { "epoch": 51.74, "learning_rate": 2.4287222808870115e-05, "loss": 0.2201, "step": 49000 }, { "epoch": 51.74, "eval_accuracy": 0.8867326732673267, "eval_loss": 0.45674923062324524, "eval_runtime": 154.5848, "eval_samples_per_second": 98.004, "eval_steps_per_second": 6.126, "step": 49000 }, { "epoch": 52.8, "learning_rate": 2.5343189017951425e-05, "loss": 0.2166, "step": 50000 }, { "epoch": 52.8, "eval_accuracy": 0.8831683168316832, "eval_loss": 0.460601806640625, "eval_runtime": 156.293, "eval_samples_per_second": 96.933, "eval_steps_per_second": 6.059, "step": 50000 }, { "epoch": 53.85, "learning_rate": 2.6399155227032734e-05, "loss": 0.2174, "step": 51000 }, { "epoch": 53.85, "eval_accuracy": 0.8804620462046204, "eval_loss": 0.4750025272369385, "eval_runtime": 155.6344, "eval_samples_per_second": 97.344, "eval_steps_per_second": 6.085, "step": 51000 }, { "epoch": 54.91, "learning_rate": 2.7455121436114044e-05, "loss": 0.2164, "step": 52000 }, { "epoch": 54.91, "eval_accuracy": 0.8831023102310231, "eval_loss": 0.4645076394081116, "eval_runtime": 156.6057, "eval_samples_per_second": 96.74, "eval_steps_per_second": 6.047, "step": 52000 } ], "logging_steps": 1000, "max_steps": 473500, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 1000, "total_flos": 2.5808866542217573e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }