|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9775689803480447, |
|
"global_step": 45000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8e-05, |
|
"loss": 1.5093, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.91079194451259e-05, |
|
"loss": 1.2959, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.82158388902518e-05, |
|
"loss": 1.2604, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.732375833537769e-05, |
|
"loss": 1.2426, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.643167778050358e-05, |
|
"loss": 1.2368, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.553959722562947e-05, |
|
"loss": 1.2252, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.464751667075538e-05, |
|
"loss": 1.2204, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.375543611588127e-05, |
|
"loss": 1.2146, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.286335556100716e-05, |
|
"loss": 1.2081, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.197127500613307e-05, |
|
"loss": 1.2069, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.107919445125896e-05, |
|
"loss": 1.2017, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.018711389638485e-05, |
|
"loss": 1.2015, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.929503334151074e-05, |
|
"loss": 1.1906, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.840295278663664e-05, |
|
"loss": 1.1962, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.751087223176253e-05, |
|
"loss": 1.1939, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 6.661879167688842e-05, |
|
"loss": 1.1844, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 6.572671112201433e-05, |
|
"loss": 1.1923, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.483463056714022e-05, |
|
"loss": 1.1864, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.394255001226611e-05, |
|
"loss": 1.1801, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.305046945739202e-05, |
|
"loss": 1.1832, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.5757644176483154, |
|
"eval_loss": 1.1787368059158325, |
|
"eval_runtime": 214.7637, |
|
"eval_samples_per_second": 4547.505, |
|
"eval_steps_per_second": 15.161, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.215838890251789e-05, |
|
"loss": 1.1804, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.12663083476438e-05, |
|
"loss": 1.1813, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 6.037422779276969e-05, |
|
"loss": 1.1795, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5.9482147237895586e-05, |
|
"loss": 1.178, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.8590066683021484e-05, |
|
"loss": 1.1815, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.769798612814738e-05, |
|
"loss": 1.1745, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.680590557327327e-05, |
|
"loss": 1.1744, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.591382501839917e-05, |
|
"loss": 1.1745, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.5021744463525054e-05, |
|
"loss": 1.1712, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.412966390865095e-05, |
|
"loss": 1.1686, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.323758335377685e-05, |
|
"loss": 1.1572, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5.234550279890275e-05, |
|
"loss": 1.1503, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5.145342224402864e-05, |
|
"loss": 1.1519, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5.0561341689154536e-05, |
|
"loss": 1.1502, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.9669261134280434e-05, |
|
"loss": 1.1465, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.877718057940632e-05, |
|
"loss": 1.1534, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.7885100024532215e-05, |
|
"loss": 1.1505, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.699301946965811e-05, |
|
"loss": 1.1492, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.610093891478401e-05, |
|
"loss": 1.1518, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.52088583599099e-05, |
|
"loss": 1.1492, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.5826605558395386, |
|
"eval_loss": 1.1605184078216553, |
|
"eval_runtime": 207.3392, |
|
"eval_samples_per_second": 4710.344, |
|
"eval_steps_per_second": 15.704, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.43167778050358e-05, |
|
"loss": 1.1465, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.34246972501617e-05, |
|
"loss": 1.1472, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.2532616695287595e-05, |
|
"loss": 1.1507, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.164053614041348e-05, |
|
"loss": 1.1466, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.074845558553938e-05, |
|
"loss": 1.1479, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.985637503066527e-05, |
|
"loss": 1.1425, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.8964294475791165e-05, |
|
"loss": 1.1497, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.807221392091706e-05, |
|
"loss": 1.1394, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.718013336604296e-05, |
|
"loss": 1.1472, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.628805281116885e-05, |
|
"loss": 1.1444, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.539597225629475e-05, |
|
"loss": 1.1439, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.450389170142064e-05, |
|
"loss": 1.1468, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.361181114654654e-05, |
|
"loss": 1.1467, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.271973059167243e-05, |
|
"loss": 1.1417, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.182765003679833e-05, |
|
"loss": 1.1423, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.0935569481924224e-05, |
|
"loss": 1.1444, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.0043488927050112e-05, |
|
"loss": 1.1382, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.915140837217601e-05, |
|
"loss": 1.1392, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.8259327817301904e-05, |
|
"loss": 1.1407, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.73672472624278e-05, |
|
"loss": 1.143, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.5862544775009155, |
|
"eval_loss": 1.1498275995254517, |
|
"eval_runtime": 205.0061, |
|
"eval_samples_per_second": 4763.951, |
|
"eval_steps_per_second": 15.882, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.6475166707553693e-05, |
|
"loss": 1.1246, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.5583086152679587e-05, |
|
"loss": 1.1176, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.4691005597805485e-05, |
|
"loss": 1.1131, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.379892504293138e-05, |
|
"loss": 1.1106, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.2906844488057273e-05, |
|
"loss": 1.1104, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.2014763933183168e-05, |
|
"loss": 1.1127, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.1122683378309062e-05, |
|
"loss": 1.1113, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.023060282343496e-05, |
|
"loss": 1.1151, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.9338522268560854e-05, |
|
"loss": 1.1177, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.8446441713686748e-05, |
|
"loss": 1.113, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.7554361158812642e-05, |
|
"loss": 1.11, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.6662280603938537e-05, |
|
"loss": 1.1172, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.577020004906443e-05, |
|
"loss": 1.1127, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.4878119494190325e-05, |
|
"loss": 1.1116, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.3986038939316221e-05, |
|
"loss": 1.1093, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.3093958384442116e-05, |
|
"loss": 1.1152, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.2201877829568012e-05, |
|
"loss": 1.1084, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.1309797274693906e-05, |
|
"loss": 1.1095, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.0417716719819802e-05, |
|
"loss": 1.1071, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 9.525636164945695e-06, |
|
"loss": 1.1088, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_accuracy": 0.5878129005432129, |
|
"eval_loss": 1.149354338645935, |
|
"eval_runtime": 211.165, |
|
"eval_samples_per_second": 4625.004, |
|
"eval_steps_per_second": 15.419, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 8.63355561007159e-06, |
|
"loss": 1.1103, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 7.741475055197485e-06, |
|
"loss": 1.113, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 6.849394500323379e-06, |
|
"loss": 1.108, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 5.9573139454492745e-06, |
|
"loss": 1.1082, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.06523339057517e-06, |
|
"loss": 1.1121, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.173152835701064e-06, |
|
"loss": 1.1051, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.281072280826959e-06, |
|
"loss": 1.1057, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.3889917259528535e-06, |
|
"loss": 1.1155, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.4969111710787484e-06, |
|
"loss": 1.1102, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 6.048306162046434e-07, |
|
"loss": 1.1091, |
|
"step": 45000 |
|
} |
|
], |
|
"max_steps": 45339, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.1008030583503237e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|