{ "best_metric": 0.42281684279441833, "best_model_checkpoint": "/data/jcanete/all_results/pawsx/albeto_tiny/epochs_4_bs_16_lr_5e-5/checkpoint-3000", "epoch": 4.0, "global_step": 12352, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "eval_accuracy": 0.5764999985694885, "eval_loss": 0.6765033006668091, "eval_runtime": 0.5222, "eval_samples_per_second": 3830.009, "eval_steps_per_second": 239.376, "step": 300 }, { "epoch": 0.16, "learning_rate": 4.798413212435233e-05, "loss": 0.6642, "step": 500 }, { "epoch": 0.19, "eval_accuracy": 0.7055000066757202, "eval_loss": 0.5674529671669006, "eval_runtime": 1.3054, "eval_samples_per_second": 1532.102, "eval_steps_per_second": 95.756, "step": 600 }, { "epoch": 0.29, "eval_accuracy": 0.737500011920929, "eval_loss": 0.5434138178825378, "eval_runtime": 0.5173, "eval_samples_per_second": 3866.263, "eval_steps_per_second": 241.641, "step": 900 }, { "epoch": 0.32, "learning_rate": 4.596421632124353e-05, "loss": 0.5097, "step": 1000 }, { "epoch": 0.39, "eval_accuracy": 0.7609999775886536, "eval_loss": 0.523171603679657, "eval_runtime": 1.4935, "eval_samples_per_second": 1339.147, "eval_steps_per_second": 83.697, "step": 1200 }, { "epoch": 0.49, "learning_rate": 4.394025259067357e-05, "loss": 0.4473, "step": 1500 }, { "epoch": 0.49, "eval_accuracy": 0.7864999771118164, "eval_loss": 0.46667101979255676, "eval_runtime": 0.5272, "eval_samples_per_second": 3793.714, "eval_steps_per_second": 237.107, "step": 1500 }, { "epoch": 0.58, "eval_accuracy": 0.8009999990463257, "eval_loss": 0.4371644854545593, "eval_runtime": 0.5192, "eval_samples_per_second": 3852.095, "eval_steps_per_second": 240.756, "step": 1800 }, { "epoch": 0.65, "learning_rate": 4.191628886010363e-05, "loss": 0.3934, "step": 2000 }, { "epoch": 0.68, "eval_accuracy": 0.7894999980926514, "eval_loss": 0.4638134837150574, "eval_runtime": 0.5279, "eval_samples_per_second": 3788.294, "eval_steps_per_second": 236.768, "step": 2100 }, { "epoch": 0.78, "eval_accuracy": 0.7885000109672546, "eval_loss": 0.4593009650707245, "eval_runtime": 0.5306, "eval_samples_per_second": 3769.222, "eval_steps_per_second": 235.576, "step": 2400 }, { "epoch": 0.81, "learning_rate": 3.989637305699482e-05, "loss": 0.3659, "step": 2500 }, { "epoch": 0.87, "eval_accuracy": 0.7960000038146973, "eval_loss": 0.4648894965648651, "eval_runtime": 0.5101, "eval_samples_per_second": 3920.44, "eval_steps_per_second": 245.027, "step": 2700 }, { "epoch": 0.97, "learning_rate": 3.787240932642487e-05, "loss": 0.3408, "step": 3000 }, { "epoch": 0.97, "eval_accuracy": 0.809499979019165, "eval_loss": 0.42281684279441833, "eval_runtime": 1.3845, "eval_samples_per_second": 1444.57, "eval_steps_per_second": 90.286, "step": 3000 }, { "epoch": 1.07, "eval_accuracy": 0.8059999942779541, "eval_loss": 0.4953405261039734, "eval_runtime": 0.5733, "eval_samples_per_second": 3488.535, "eval_steps_per_second": 218.033, "step": 3300 }, { "epoch": 1.13, "learning_rate": 3.5848445595854926e-05, "loss": 0.2812, "step": 3500 }, { "epoch": 1.17, "eval_accuracy": 0.7994999885559082, "eval_loss": 0.5381343364715576, "eval_runtime": 0.5151, "eval_samples_per_second": 3882.955, "eval_steps_per_second": 242.685, "step": 3600 }, { "epoch": 1.26, "eval_accuracy": 0.8054999709129333, "eval_loss": 0.4457036256790161, "eval_runtime": 0.5211, "eval_samples_per_second": 3838.159, "eval_steps_per_second": 239.885, "step": 3900 }, { "epoch": 1.3, "learning_rate": 3.382448186528497e-05, "loss": 0.2545, "step": 4000 }, { "epoch": 1.36, "eval_accuracy": 0.8149999976158142, "eval_loss": 0.4693449139595032, "eval_runtime": 0.5216, "eval_samples_per_second": 3834.156, "eval_steps_per_second": 239.635, "step": 4200 }, { "epoch": 1.46, "learning_rate": 3.180051813471503e-05, "loss": 0.2588, "step": 4500 }, { "epoch": 1.46, "eval_accuracy": 0.8220000267028809, "eval_loss": 0.48552772402763367, "eval_runtime": 2.4236, "eval_samples_per_second": 825.207, "eval_steps_per_second": 51.575, "step": 4500 }, { "epoch": 1.55, "eval_accuracy": 0.8184999823570251, "eval_loss": 0.48397132754325867, "eval_runtime": 0.5143, "eval_samples_per_second": 3888.874, "eval_steps_per_second": 243.055, "step": 4800 }, { "epoch": 1.62, "learning_rate": 2.9780602331606216e-05, "loss": 0.2436, "step": 5000 }, { "epoch": 1.65, "eval_accuracy": 0.8080000281333923, "eval_loss": 0.5025840997695923, "eval_runtime": 0.5261, "eval_samples_per_second": 3801.816, "eval_steps_per_second": 237.613, "step": 5100 }, { "epoch": 1.75, "eval_accuracy": 0.8140000104904175, "eval_loss": 0.4753943681716919, "eval_runtime": 0.5312, "eval_samples_per_second": 3765.313, "eval_steps_per_second": 235.332, "step": 5400 }, { "epoch": 1.78, "learning_rate": 2.7756638601036272e-05, "loss": 0.2414, "step": 5500 }, { "epoch": 1.85, "eval_accuracy": 0.8034999966621399, "eval_loss": 0.48492932319641113, "eval_runtime": 0.5256, "eval_samples_per_second": 3805.048, "eval_steps_per_second": 237.815, "step": 5700 }, { "epoch": 1.94, "learning_rate": 2.573267487046632e-05, "loss": 0.2527, "step": 6000 }, { "epoch": 1.94, "eval_accuracy": 0.8100000023841858, "eval_loss": 0.48092204332351685, "eval_runtime": 0.52, "eval_samples_per_second": 3846.385, "eval_steps_per_second": 240.399, "step": 6000 }, { "epoch": 2.04, "eval_accuracy": 0.8134999871253967, "eval_loss": 0.5660321116447449, "eval_runtime": 0.5164, "eval_samples_per_second": 3873.13, "eval_steps_per_second": 242.071, "step": 6300 }, { "epoch": 2.1, "learning_rate": 2.3708711139896374e-05, "loss": 0.1818, "step": 6500 }, { "epoch": 2.14, "eval_accuracy": 0.8180000185966492, "eval_loss": 0.6352373361587524, "eval_runtime": 0.5235, "eval_samples_per_second": 3820.269, "eval_steps_per_second": 238.767, "step": 6600 }, { "epoch": 2.23, "eval_accuracy": 0.8209999799728394, "eval_loss": 0.6510393619537354, "eval_runtime": 0.5162, "eval_samples_per_second": 3874.656, "eval_steps_per_second": 242.166, "step": 6900 }, { "epoch": 2.27, "learning_rate": 2.1692843264248704e-05, "loss": 0.1776, "step": 7000 }, { "epoch": 2.33, "eval_accuracy": 0.8065000176429749, "eval_loss": 0.684609591960907, "eval_runtime": 0.5072, "eval_samples_per_second": 3943.283, "eval_steps_per_second": 246.455, "step": 7200 }, { "epoch": 2.43, "learning_rate": 1.9668879533678756e-05, "loss": 0.1754, "step": 7500 }, { "epoch": 2.43, "eval_accuracy": 0.8100000023841858, "eval_loss": 0.6463525295257568, "eval_runtime": 1.4644, "eval_samples_per_second": 1365.751, "eval_steps_per_second": 85.359, "step": 7500 }, { "epoch": 2.53, "eval_accuracy": 0.8125, "eval_loss": 0.6290258169174194, "eval_runtime": 3.6012, "eval_samples_per_second": 555.376, "eval_steps_per_second": 34.711, "step": 7800 }, { "epoch": 2.59, "learning_rate": 1.764491580310881e-05, "loss": 0.1763, "step": 8000 }, { "epoch": 2.62, "eval_accuracy": 0.8144999742507935, "eval_loss": 0.6613443493843079, "eval_runtime": 2.9055, "eval_samples_per_second": 688.341, "eval_steps_per_second": 43.021, "step": 8100 }, { "epoch": 2.72, "eval_accuracy": 0.8224999904632568, "eval_loss": 0.6761817336082458, "eval_runtime": 1.1073, "eval_samples_per_second": 1806.22, "eval_steps_per_second": 112.889, "step": 8400 }, { "epoch": 2.75, "learning_rate": 1.5620952072538862e-05, "loss": 0.1853, "step": 8500 }, { "epoch": 2.82, "eval_accuracy": 0.8119999766349792, "eval_loss": 0.6429938077926636, "eval_runtime": 0.5254, "eval_samples_per_second": 3806.417, "eval_steps_per_second": 237.901, "step": 8700 }, { "epoch": 2.91, "learning_rate": 1.3596988341968913e-05, "loss": 0.1804, "step": 9000 }, { "epoch": 2.91, "eval_accuracy": 0.8255000114440918, "eval_loss": 0.6622639298439026, "eval_runtime": 1.2718, "eval_samples_per_second": 1572.63, "eval_steps_per_second": 98.289, "step": 9000 }, { "epoch": 3.01, "eval_accuracy": 0.8245000243186951, "eval_loss": 0.6402567028999329, "eval_runtime": 0.5322, "eval_samples_per_second": 3757.904, "eval_steps_per_second": 234.869, "step": 9300 }, { "epoch": 3.08, "learning_rate": 1.1577072538860104e-05, "loss": 0.1321, "step": 9500 }, { "epoch": 3.11, "eval_accuracy": 0.8184999823570251, "eval_loss": 0.76594477891922, "eval_runtime": 1.2705, "eval_samples_per_second": 1574.241, "eval_steps_per_second": 98.39, "step": 9600 }, { "epoch": 3.21, "eval_accuracy": 0.8264999985694885, "eval_loss": 0.7427929043769836, "eval_runtime": 0.5386, "eval_samples_per_second": 3713.17, "eval_steps_per_second": 232.073, "step": 9900 }, { "epoch": 3.24, "learning_rate": 9.553108808290157e-06, "loss": 0.1135, "step": 10000 }, { "epoch": 3.3, "eval_accuracy": 0.8144999742507935, "eval_loss": 0.7777068018913269, "eval_runtime": 2.4153, "eval_samples_per_second": 828.039, "eval_steps_per_second": 51.752, "step": 10200 }, { "epoch": 3.4, "learning_rate": 7.529145077720208e-06, "loss": 0.1182, "step": 10500 }, { "epoch": 3.4, "eval_accuracy": 0.8199999928474426, "eval_loss": 0.7448311448097229, "eval_runtime": 0.5257, "eval_samples_per_second": 3804.49, "eval_steps_per_second": 237.781, "step": 10500 }, { "epoch": 3.5, "eval_accuracy": 0.824999988079071, "eval_loss": 0.7690622806549072, "eval_runtime": 0.5391, "eval_samples_per_second": 3709.781, "eval_steps_per_second": 231.861, "step": 10800 }, { "epoch": 3.56, "learning_rate": 5.50518134715026e-06, "loss": 0.1114, "step": 11000 }, { "epoch": 3.59, "eval_accuracy": 0.8195000290870667, "eval_loss": 0.7780025601387024, "eval_runtime": 0.5331, "eval_samples_per_second": 3751.575, "eval_steps_per_second": 234.473, "step": 11100 }, { "epoch": 3.69, "eval_accuracy": 0.8220000267028809, "eval_loss": 0.7819697260856628, "eval_runtime": 0.5267, "eval_samples_per_second": 3797.154, "eval_steps_per_second": 237.322, "step": 11400 }, { "epoch": 3.72, "learning_rate": 3.4852655440414507e-06, "loss": 0.0992, "step": 11500 }, { "epoch": 3.79, "eval_accuracy": 0.8215000033378601, "eval_loss": 0.79557865858078, "eval_runtime": 0.537, "eval_samples_per_second": 3724.493, "eval_steps_per_second": 232.781, "step": 11700 }, { "epoch": 3.89, "learning_rate": 1.4613018134715026e-06, "loss": 0.1068, "step": 12000 }, { "epoch": 3.89, "eval_accuracy": 0.8224999904632568, "eval_loss": 0.793406069278717, "eval_runtime": 0.5363, "eval_samples_per_second": 3729.122, "eval_steps_per_second": 233.07, "step": 12000 }, { "epoch": 3.98, "eval_accuracy": 0.8215000033378601, "eval_loss": 0.8001092672348022, "eval_runtime": 0.5353, "eval_samples_per_second": 3736.045, "eval_steps_per_second": 233.503, "step": 12300 }, { "epoch": 4.0, "step": 12352, "total_flos": 144723222180000.0, "train_loss": 0.24578420763806358, "train_runtime": 1895.6753, "train_samples_per_second": 104.239, "train_steps_per_second": 6.516 } ], "max_steps": 12352, "num_train_epochs": 4, "total_flos": 144723222180000.0, "trial_name": null, "trial_params": null }