|
{ |
|
"best_metric": 1.0146315097808838, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 1.262044653349001, |
|
"eval_steps": 25, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01253427340383862, |
|
"grad_norm": 1.065430760383606, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4767, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01253427340383862, |
|
"eval_loss": 1.9253945350646973, |
|
"eval_runtime": 0.5058, |
|
"eval_samples_per_second": 98.851, |
|
"eval_steps_per_second": 25.701, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02506854680767724, |
|
"grad_norm": 1.6225430965423584, |
|
"learning_rate": 4e-05, |
|
"loss": 1.3994, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03760282021151586, |
|
"grad_norm": 1.2386142015457153, |
|
"learning_rate": 6e-05, |
|
"loss": 1.415, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05013709361535448, |
|
"grad_norm": 0.6819537878036499, |
|
"learning_rate": 8e-05, |
|
"loss": 1.4403, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06267136701919311, |
|
"grad_norm": 0.8419666886329651, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3604, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07520564042303172, |
|
"grad_norm": 0.9854099750518799, |
|
"learning_rate": 9.997539658034168e-05, |
|
"loss": 1.3129, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.08773991382687035, |
|
"grad_norm": 0.8464058041572571, |
|
"learning_rate": 9.990161322484486e-05, |
|
"loss": 1.3227, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.10027418723070897, |
|
"grad_norm": 0.902024507522583, |
|
"learning_rate": 9.977873061452552e-05, |
|
"loss": 1.3704, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.1128084606345476, |
|
"grad_norm": 1.2437635660171509, |
|
"learning_rate": 9.96068831197139e-05, |
|
"loss": 1.5765, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.12534273403838622, |
|
"grad_norm": 1.6130616664886475, |
|
"learning_rate": 9.938625865312251e-05, |
|
"loss": 1.6439, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13787700744222484, |
|
"grad_norm": 2.1686384677886963, |
|
"learning_rate": 9.911709846436641e-05, |
|
"loss": 1.688, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15041128084606345, |
|
"grad_norm": 2.926682472229004, |
|
"learning_rate": 9.879969687616027e-05, |
|
"loss": 1.7303, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.16294555424990206, |
|
"grad_norm": 1.3535600900650024, |
|
"learning_rate": 9.84344009624807e-05, |
|
"loss": 1.8476, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1754798276537407, |
|
"grad_norm": 0.5178162455558777, |
|
"learning_rate": 9.80216101690461e-05, |
|
"loss": 1.2019, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.18801410105757932, |
|
"grad_norm": 0.5845542550086975, |
|
"learning_rate": 9.756177587652856e-05, |
|
"loss": 1.2685, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.20054837446141793, |
|
"grad_norm": 0.3702237606048584, |
|
"learning_rate": 9.705540090697575e-05, |
|
"loss": 1.1359, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.21308264786525657, |
|
"grad_norm": 0.3851359486579895, |
|
"learning_rate": 9.650303897398232e-05, |
|
"loss": 1.1106, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2256169212690952, |
|
"grad_norm": 0.44309094548225403, |
|
"learning_rate": 9.590529407721231e-05, |
|
"loss": 1.0739, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2381511946729338, |
|
"grad_norm": 0.4140450060367584, |
|
"learning_rate": 9.526281984193436e-05, |
|
"loss": 1.0593, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.25068546807677244, |
|
"grad_norm": 0.4120848476886749, |
|
"learning_rate": 9.4576318804292e-05, |
|
"loss": 1.0905, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26321974148061106, |
|
"grad_norm": 0.5593251585960388, |
|
"learning_rate": 9.384654164309083e-05, |
|
"loss": 1.164, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.27575401488444967, |
|
"grad_norm": 0.47133612632751465, |
|
"learning_rate": 9.30742863589421e-05, |
|
"loss": 1.1828, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.2882882882882883, |
|
"grad_norm": 0.8377242088317871, |
|
"learning_rate": 9.226039740166091e-05, |
|
"loss": 1.3158, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3008225616921269, |
|
"grad_norm": 0.758277416229248, |
|
"learning_rate": 9.140576474687264e-05, |
|
"loss": 1.3127, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3133568350959655, |
|
"grad_norm": 1.3909739255905151, |
|
"learning_rate": 9.051132292283771e-05, |
|
"loss": 1.5423, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3133568350959655, |
|
"eval_loss": 1.1302893161773682, |
|
"eval_runtime": 0.5032, |
|
"eval_samples_per_second": 99.369, |
|
"eval_steps_per_second": 25.836, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3258911084998041, |
|
"grad_norm": 0.39622560143470764, |
|
"learning_rate": 8.957804998855866e-05, |
|
"loss": 1.2607, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3384253819036428, |
|
"grad_norm": 0.30300581455230713, |
|
"learning_rate": 8.860696646428693e-05, |
|
"loss": 1.158, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3509596553074814, |
|
"grad_norm": 0.2985769510269165, |
|
"learning_rate": 8.759913421559902e-05, |
|
"loss": 1.2553, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.36349392871132, |
|
"grad_norm": 0.23289650678634644, |
|
"learning_rate": 8.655565529226198e-05, |
|
"loss": 1.0957, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.37602820211515864, |
|
"grad_norm": 0.28858616948127747, |
|
"learning_rate": 8.547767072315835e-05, |
|
"loss": 1.0659, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.38856247551899725, |
|
"grad_norm": 0.32212552428245544, |
|
"learning_rate": 8.436635926858759e-05, |
|
"loss": 1.0239, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.40109674892283587, |
|
"grad_norm": 0.35565420985221863, |
|
"learning_rate": 8.322293613130917e-05, |
|
"loss": 0.9998, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.4136310223266745, |
|
"grad_norm": 0.3142378628253937, |
|
"learning_rate": 8.204865162773613e-05, |
|
"loss": 1.0932, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.42616529573051315, |
|
"grad_norm": 0.3126344382762909, |
|
"learning_rate": 8.084478982073247e-05, |
|
"loss": 1.0905, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.43869956913435176, |
|
"grad_norm": 0.3326547145843506, |
|
"learning_rate": 7.961266711550922e-05, |
|
"loss": 1.1614, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4512338425381904, |
|
"grad_norm": 0.5252828001976013, |
|
"learning_rate": 7.835363082015468e-05, |
|
"loss": 1.2527, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.463768115942029, |
|
"grad_norm": 0.6218559741973877, |
|
"learning_rate": 7.706905767237288e-05, |
|
"loss": 1.2688, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.4763023893458676, |
|
"grad_norm": 0.8121174573898315, |
|
"learning_rate": 7.576035233404096e-05, |
|
"loss": 1.4308, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.4888366627497062, |
|
"grad_norm": 0.3257191479206085, |
|
"learning_rate": 7.442894585523218e-05, |
|
"loss": 1.1662, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.5013709361535449, |
|
"grad_norm": 0.3431564271450043, |
|
"learning_rate": 7.307629410938363e-05, |
|
"loss": 1.1489, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5139052095573835, |
|
"grad_norm": 0.3011854290962219, |
|
"learning_rate": 7.170387620131993e-05, |
|
"loss": 1.1174, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.5264394829612221, |
|
"grad_norm": 0.25172197818756104, |
|
"learning_rate": 7.031319284987394e-05, |
|
"loss": 1.0787, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5389737563650607, |
|
"grad_norm": 0.198319673538208, |
|
"learning_rate": 6.890576474687263e-05, |
|
"loss": 0.9878, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5515080297688993, |
|
"grad_norm": 0.2211153209209442, |
|
"learning_rate": 6.7483130894283e-05, |
|
"loss": 0.9592, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.564042303172738, |
|
"grad_norm": 0.2352219521999359, |
|
"learning_rate": 6.604684692133597e-05, |
|
"loss": 1.0132, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5765765765765766, |
|
"grad_norm": 0.2619933485984802, |
|
"learning_rate": 6.459848338346861e-05, |
|
"loss": 1.0471, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.5891108499804152, |
|
"grad_norm": 0.3275744616985321, |
|
"learning_rate": 6.313962404494496e-05, |
|
"loss": 1.1015, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.6016451233842538, |
|
"grad_norm": 0.4369727075099945, |
|
"learning_rate": 6.167186414703289e-05, |
|
"loss": 1.1958, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6141793967880924, |
|
"grad_norm": 0.592064380645752, |
|
"learning_rate": 6.019680866363139e-05, |
|
"loss": 1.2309, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.626713670191931, |
|
"grad_norm": 0.8621000051498413, |
|
"learning_rate": 5.8716070546254966e-05, |
|
"loss": 1.2946, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.626713670191931, |
|
"eval_loss": 1.0444798469543457, |
|
"eval_runtime": 0.5038, |
|
"eval_samples_per_second": 99.243, |
|
"eval_steps_per_second": 25.803, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6392479435957696, |
|
"grad_norm": 0.5975894331932068, |
|
"learning_rate": 5.7231268960295e-05, |
|
"loss": 1.541, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.6517822169996083, |
|
"grad_norm": 0.15483112633228302, |
|
"learning_rate": 5.574402751448614e-05, |
|
"loss": 1.1529, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.664316490403447, |
|
"grad_norm": 0.18394102156162262, |
|
"learning_rate": 5.425597248551387e-05, |
|
"loss": 1.0972, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.6768507638072856, |
|
"grad_norm": 0.19094309210777283, |
|
"learning_rate": 5.2768731039705e-05, |
|
"loss": 1.0735, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.6893850372111242, |
|
"grad_norm": 0.20925824344158173, |
|
"learning_rate": 5.128392945374505e-05, |
|
"loss": 1.014, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7019193106149628, |
|
"grad_norm": 0.1995202600955963, |
|
"learning_rate": 4.980319133636863e-05, |
|
"loss": 0.9662, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7144535840188014, |
|
"grad_norm": 0.23315928876399994, |
|
"learning_rate": 4.83281358529671e-05, |
|
"loss": 0.9831, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.72698785742264, |
|
"grad_norm": 0.24936404824256897, |
|
"learning_rate": 4.686037595505507e-05, |
|
"loss": 1.0317, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.7395221308264787, |
|
"grad_norm": 0.3004598915576935, |
|
"learning_rate": 4.54015166165314e-05, |
|
"loss": 1.0879, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.7520564042303173, |
|
"grad_norm": 0.3489072620868683, |
|
"learning_rate": 4.395315307866405e-05, |
|
"loss": 1.0994, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7645906776341559, |
|
"grad_norm": 0.4188476800918579, |
|
"learning_rate": 4.2516869105717004e-05, |
|
"loss": 1.1941, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.7771249510379945, |
|
"grad_norm": 0.589253306388855, |
|
"learning_rate": 4.109423525312738e-05, |
|
"loss": 1.2446, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.7896592244418331, |
|
"grad_norm": 1.2236573696136475, |
|
"learning_rate": 3.968680715012606e-05, |
|
"loss": 1.8354, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.8021934978456717, |
|
"grad_norm": 0.1607893407344818, |
|
"learning_rate": 3.829612379868006e-05, |
|
"loss": 1.1968, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.8147277712495103, |
|
"grad_norm": 0.15359888970851898, |
|
"learning_rate": 3.692370589061639e-05, |
|
"loss": 1.15, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.827262044653349, |
|
"grad_norm": 0.17053017020225525, |
|
"learning_rate": 3.557105414476782e-05, |
|
"loss": 1.0514, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.8397963180571876, |
|
"grad_norm": 0.17347736656665802, |
|
"learning_rate": 3.423964766595906e-05, |
|
"loss": 1.0406, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.8523305914610263, |
|
"grad_norm": 0.19352148473262787, |
|
"learning_rate": 3.293094232762715e-05, |
|
"loss": 0.9618, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.8648648648648649, |
|
"grad_norm": 0.21194122731685638, |
|
"learning_rate": 3.164636917984534e-05, |
|
"loss": 0.9632, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.8773991382687035, |
|
"grad_norm": 0.22881725430488586, |
|
"learning_rate": 3.0387332884490805e-05, |
|
"loss": 0.9874, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8899334116725421, |
|
"grad_norm": 0.24773931503295898, |
|
"learning_rate": 2.9155210179267546e-05, |
|
"loss": 1.0175, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.9024676850763808, |
|
"grad_norm": 0.315243124961853, |
|
"learning_rate": 2.7951348372263875e-05, |
|
"loss": 1.1039, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.9150019584802194, |
|
"grad_norm": 0.3527504503726959, |
|
"learning_rate": 2.677706386869083e-05, |
|
"loss": 1.0922, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.927536231884058, |
|
"grad_norm": 0.5069158673286438, |
|
"learning_rate": 2.5633640731412412e-05, |
|
"loss": 1.1989, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.9400705052878966, |
|
"grad_norm": 0.89546799659729, |
|
"learning_rate": 2.4522329276841663e-05, |
|
"loss": 1.2376, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9400705052878966, |
|
"eval_loss": 1.0211966037750244, |
|
"eval_runtime": 0.5041, |
|
"eval_samples_per_second": 99.195, |
|
"eval_steps_per_second": 25.791, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9526047786917352, |
|
"grad_norm": 0.4782760441303253, |
|
"learning_rate": 2.3444344707738015e-05, |
|
"loss": 1.4443, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.9651390520955738, |
|
"grad_norm": 0.17578525841236115, |
|
"learning_rate": 2.2400865784401e-05, |
|
"loss": 1.0353, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.9776733254994124, |
|
"grad_norm": 0.19925934076309204, |
|
"learning_rate": 2.1393033535713093e-05, |
|
"loss": 0.9825, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.990207598903251, |
|
"grad_norm": 0.321463018655777, |
|
"learning_rate": 2.0421950011441354e-05, |
|
"loss": 1.1059, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.0113591852722288, |
|
"grad_norm": 1.0701099634170532, |
|
"learning_rate": 1.9488677077162295e-05, |
|
"loss": 2.3199, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0238934586760673, |
|
"grad_norm": 0.20064501464366913, |
|
"learning_rate": 1.8594235253127375e-05, |
|
"loss": 1.193, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.036427732079906, |
|
"grad_norm": 0.16497324407100677, |
|
"learning_rate": 1.77396025983391e-05, |
|
"loss": 1.0823, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.0489620054837445, |
|
"grad_norm": 0.1805717796087265, |
|
"learning_rate": 1.6925713641057904e-05, |
|
"loss": 1.0458, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.0614962788875832, |
|
"grad_norm": 0.17193178832530975, |
|
"learning_rate": 1.6153458356909176e-05, |
|
"loss": 0.9725, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.074030552291422, |
|
"grad_norm": 0.18326754868030548, |
|
"learning_rate": 1.5423681195707997e-05, |
|
"loss": 0.9722, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.0865648256952605, |
|
"grad_norm": 0.20325006544589996, |
|
"learning_rate": 1.4737180158065644e-05, |
|
"loss": 0.9446, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.0990990990990992, |
|
"grad_norm": 0.23397134244441986, |
|
"learning_rate": 1.4094705922787687e-05, |
|
"loss": 0.9672, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.1116333725029377, |
|
"grad_norm": 0.2701351046562195, |
|
"learning_rate": 1.3496961026017687e-05, |
|
"loss": 1.0053, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.1241676459067764, |
|
"grad_norm": 0.3563973307609558, |
|
"learning_rate": 1.2944599093024267e-05, |
|
"loss": 1.1253, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.136701919310615, |
|
"grad_norm": 0.3762572705745697, |
|
"learning_rate": 1.2438224123471442e-05, |
|
"loss": 1.1212, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.1492361927144537, |
|
"grad_norm": 0.4805110991001129, |
|
"learning_rate": 1.1978389830953907e-05, |
|
"loss": 1.1656, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.1617704661182922, |
|
"grad_norm": 0.9201070070266724, |
|
"learning_rate": 1.1565599037519316e-05, |
|
"loss": 1.5552, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.1743047395221309, |
|
"grad_norm": 0.24048174917697906, |
|
"learning_rate": 1.1200303123839742e-05, |
|
"loss": 1.2953, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.1868390129259694, |
|
"grad_norm": 0.159620463848114, |
|
"learning_rate": 1.088290153563358e-05, |
|
"loss": 1.1379, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.199373286329808, |
|
"grad_norm": 0.17821361124515533, |
|
"learning_rate": 1.0613741346877497e-05, |
|
"loss": 1.0653, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.2119075597336466, |
|
"grad_norm": 0.18576368689537048, |
|
"learning_rate": 1.0393116880286118e-05, |
|
"loss": 0.9979, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.2244418331374853, |
|
"grad_norm": 0.17474296689033508, |
|
"learning_rate": 1.0221269385474488e-05, |
|
"loss": 0.95, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.2369761065413238, |
|
"grad_norm": 0.18885326385498047, |
|
"learning_rate": 1.0098386775155147e-05, |
|
"loss": 0.9432, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.2495103799451626, |
|
"grad_norm": 0.21814164519309998, |
|
"learning_rate": 1.0024603419658329e-05, |
|
"loss": 0.9849, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.262044653349001, |
|
"grad_norm": 0.2603691816329956, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0749, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.262044653349001, |
|
"eval_loss": 1.0146315097808838, |
|
"eval_runtime": 0.5028, |
|
"eval_samples_per_second": 99.449, |
|
"eval_steps_per_second": 25.857, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6898819529847603e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|