|
{ |
|
"best_metric": 0.9274308681488037, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 3.0, |
|
"eval_steps": 50, |
|
"global_step": 141, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02127659574468085, |
|
"grad_norm": 13.848388671875, |
|
"learning_rate": 1.02e-05, |
|
"loss": 4.4616, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02127659574468085, |
|
"eval_loss": 3.8836147785186768, |
|
"eval_runtime": 3.5795, |
|
"eval_samples_per_second": 176.841, |
|
"eval_steps_per_second": 5.587, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0425531914893617, |
|
"grad_norm": 6.909480094909668, |
|
"learning_rate": 2.04e-05, |
|
"loss": 3.8433, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06382978723404255, |
|
"grad_norm": 5.8276777267456055, |
|
"learning_rate": 3.06e-05, |
|
"loss": 3.5629, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 5.752169132232666, |
|
"learning_rate": 4.08e-05, |
|
"loss": 3.5849, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.10638297872340426, |
|
"grad_norm": 6.22681999206543, |
|
"learning_rate": 5.1e-05, |
|
"loss": 3.1647, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 9.792737007141113, |
|
"learning_rate": 6.12e-05, |
|
"loss": 2.7336, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.14893617021276595, |
|
"grad_norm": 8.102272033691406, |
|
"learning_rate": 7.14e-05, |
|
"loss": 2.2142, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 5.091486930847168, |
|
"learning_rate": 8.16e-05, |
|
"loss": 1.9189, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.19148936170212766, |
|
"grad_norm": 2.87353515625, |
|
"learning_rate": 9.18e-05, |
|
"loss": 1.9533, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 2.3648200035095215, |
|
"learning_rate": 0.000102, |
|
"loss": 1.5369, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23404255319148937, |
|
"grad_norm": 1.6724998950958252, |
|
"learning_rate": 0.00010198533518731099, |
|
"loss": 1.4629, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 1.2147119045257568, |
|
"learning_rate": 0.0001019413491828413, |
|
"loss": 1.1376, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2765957446808511, |
|
"grad_norm": 7.048776626586914, |
|
"learning_rate": 0.00010186806728253272, |
|
"loss": 0.8492, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2978723404255319, |
|
"grad_norm": 5.654258728027344, |
|
"learning_rate": 0.00010176553163012415, |
|
"loss": 0.5646, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3191489361702128, |
|
"grad_norm": 4.756352424621582, |
|
"learning_rate": 0.00010163380119291505, |
|
"loss": 1.6847, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 1.8449368476867676, |
|
"learning_rate": 0.00010147295172785395, |
|
"loss": 1.4681, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3617021276595745, |
|
"grad_norm": 0.6776601672172546, |
|
"learning_rate": 0.00010128307573797129, |
|
"loss": 1.244, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.3829787234042553, |
|
"grad_norm": 0.5467591285705566, |
|
"learning_rate": 0.00010106428241918177, |
|
"loss": 1.1554, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.40425531914893614, |
|
"grad_norm": 0.8386530876159668, |
|
"learning_rate": 0.00010081669759748692, |
|
"loss": 0.7783, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 0.9994120597839355, |
|
"learning_rate": 0.00010054046365661356, |
|
"loss": 0.1474, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44680851063829785, |
|
"grad_norm": 0.9148034453392029, |
|
"learning_rate": 0.00010023573945613038, |
|
"loss": 1.3979, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.46808510638297873, |
|
"grad_norm": 0.6885150074958801, |
|
"learning_rate": 9.99027002400892e-05, |
|
"loss": 1.4452, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.48936170212765956, |
|
"grad_norm": 0.5044928193092346, |
|
"learning_rate": 9.954153753624383e-05, |
|
"loss": 1.2974, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 0.517201840877533, |
|
"learning_rate": 9.915245904590414e-05, |
|
"loss": 1.2173, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"grad_norm": 0.7127615809440613, |
|
"learning_rate": 9.873568852448903e-05, |
|
"loss": 1.0097, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5531914893617021, |
|
"grad_norm": 1.4452153444290161, |
|
"learning_rate": 9.829146565284679e-05, |
|
"loss": 0.0585, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.574468085106383, |
|
"grad_norm": 0.6424712538719177, |
|
"learning_rate": 9.782004589941682e-05, |
|
"loss": 1.1097, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5957446808510638, |
|
"grad_norm": 0.4880043864250183, |
|
"learning_rate": 9.732170037331209e-05, |
|
"loss": 1.4546, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6170212765957447, |
|
"grad_norm": 0.4043918251991272, |
|
"learning_rate": 9.679671566840698e-05, |
|
"loss": 1.2941, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"grad_norm": 0.4223072826862335, |
|
"learning_rate": 9.624539369851954e-05, |
|
"loss": 1.14, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6595744680851063, |
|
"grad_norm": 0.5029892325401306, |
|
"learning_rate": 9.566805152378394e-05, |
|
"loss": 0.9826, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 0.5004958510398865, |
|
"learning_rate": 9.50650211683119e-05, |
|
"loss": 0.3189, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7021276595744681, |
|
"grad_norm": 0.46429964900016785, |
|
"learning_rate": 9.443664942924885e-05, |
|
"loss": 0.9003, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.723404255319149, |
|
"grad_norm": 0.5286682844161987, |
|
"learning_rate": 9.378329767733415e-05, |
|
"loss": 1.4447, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.7446808510638298, |
|
"grad_norm": 0.4269276559352875, |
|
"learning_rate": 9.310534164908e-05, |
|
"loss": 1.3137, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"grad_norm": 0.370991587638855, |
|
"learning_rate": 9.240317123068899e-05, |
|
"loss": 1.1297, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.7872340425531915, |
|
"grad_norm": 0.4747011065483093, |
|
"learning_rate": 9.167719023383408e-05, |
|
"loss": 1.0179, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8085106382978723, |
|
"grad_norm": 0.4905516803264618, |
|
"learning_rate": 9.09278161634304e-05, |
|
"loss": 0.4583, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.8297872340425532, |
|
"grad_norm": 0.43988707661628723, |
|
"learning_rate": 9.015547997753193e-05, |
|
"loss": 0.6616, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 0.45796748995780945, |
|
"learning_rate": 8.936062583949154e-05, |
|
"loss": 1.4275, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8723404255319149, |
|
"grad_norm": 0.36554864048957825, |
|
"learning_rate": 8.854371086252688e-05, |
|
"loss": 1.2779, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.8936170212765957, |
|
"grad_norm": 0.3677642345428467, |
|
"learning_rate": 8.770520484683873e-05, |
|
"loss": 1.1917, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9148936170212766, |
|
"grad_norm": 0.35996830463409424, |
|
"learning_rate": 8.68455900094333e-05, |
|
"loss": 1.0847, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.9361702127659575, |
|
"grad_norm": 0.4477192163467407, |
|
"learning_rate": 8.596536070680378e-05, |
|
"loss": 0.7112, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.9574468085106383, |
|
"grad_norm": 0.4123137891292572, |
|
"learning_rate": 8.506502315063037e-05, |
|
"loss": 0.9964, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.9787234042553191, |
|
"grad_norm": 0.358024001121521, |
|
"learning_rate": 8.414509511666283e-05, |
|
"loss": 1.2649, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.37631550431251526, |
|
"learning_rate": 8.320610564695234e-05, |
|
"loss": 0.9995, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.0212765957446808, |
|
"grad_norm": 0.22452251613140106, |
|
"learning_rate": 8.224859474560443e-05, |
|
"loss": 0.1898, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0425531914893618, |
|
"grad_norm": 0.4229590594768524, |
|
"learning_rate": 8.127311306822753e-05, |
|
"loss": 1.1368, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 0.37615370750427246, |
|
"learning_rate": 8.028022160525618e-05, |
|
"loss": 1.3321, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"eval_loss": 0.9616568088531494, |
|
"eval_runtime": 3.2062, |
|
"eval_samples_per_second": 197.427, |
|
"eval_steps_per_second": 6.238, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0851063829787233, |
|
"grad_norm": 0.32277727127075195, |
|
"learning_rate": 7.927049135933059e-05, |
|
"loss": 1.1556, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.1063829787234043, |
|
"grad_norm": 0.4011160135269165, |
|
"learning_rate": 7.82445030169183e-05, |
|
"loss": 1.0646, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.127659574468085, |
|
"grad_norm": 0.5565645098686218, |
|
"learning_rate": 7.720284661436687e-05, |
|
"loss": 0.8884, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.148936170212766, |
|
"grad_norm": 0.30222636461257935, |
|
"learning_rate": 7.614612119857942e-05, |
|
"loss": 0.2506, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.1702127659574468, |
|
"grad_norm": 0.3559470772743225, |
|
"learning_rate": 7.507493448250836e-05, |
|
"loss": 0.722, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.1914893617021276, |
|
"grad_norm": 0.4607730805873871, |
|
"learning_rate": 7.398990249566532e-05, |
|
"loss": 1.3531, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.2127659574468086, |
|
"grad_norm": 0.39202919602394104, |
|
"learning_rate": 7.289164922984824e-05, |
|
"loss": 1.2476, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.2340425531914894, |
|
"grad_norm": 0.35366523265838623, |
|
"learning_rate": 7.178080628028965e-05, |
|
"loss": 1.1099, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.2553191489361701, |
|
"grad_norm": 0.40545791387557983, |
|
"learning_rate": 7.065801248243196e-05, |
|
"loss": 0.9506, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"grad_norm": 0.3662566542625427, |
|
"learning_rate": 6.952391354453924e-05, |
|
"loss": 0.4258, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.297872340425532, |
|
"grad_norm": 0.28656280040740967, |
|
"learning_rate": 6.837916167635644e-05, |
|
"loss": 0.4806, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.3191489361702127, |
|
"grad_norm": 0.47951042652130127, |
|
"learning_rate": 6.722441521402946e-05, |
|
"loss": 1.3409, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.3404255319148937, |
|
"grad_norm": 0.4036313593387604, |
|
"learning_rate": 6.606033824150241e-05, |
|
"loss": 1.272, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.3617021276595744, |
|
"grad_norm": 0.372051477432251, |
|
"learning_rate": 6.48876002086089e-05, |
|
"loss": 1.0842, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.3829787234042552, |
|
"grad_norm": 0.4357682466506958, |
|
"learning_rate": 6.37068755460778e-05, |
|
"loss": 1.0105, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.4042553191489362, |
|
"grad_norm": 0.5092247128486633, |
|
"learning_rate": 6.251884327767429e-05, |
|
"loss": 0.6371, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.425531914893617, |
|
"grad_norm": 0.202036052942276, |
|
"learning_rate": 6.132418662969977e-05, |
|
"loss": 0.2384, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.4468085106382977, |
|
"grad_norm": 0.4590073227882385, |
|
"learning_rate": 6.012359263807463e-05, |
|
"loss": 1.2431, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.4680851063829787, |
|
"grad_norm": 0.395398885011673, |
|
"learning_rate": 5.891775175323035e-05, |
|
"loss": 1.2642, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.4893617021276595, |
|
"grad_norm": 0.4025956690311432, |
|
"learning_rate": 5.770735744303787e-05, |
|
"loss": 1.1548, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5106382978723403, |
|
"grad_norm": 0.402270644903183, |
|
"learning_rate": 5.6493105794000665e-05, |
|
"loss": 1.0174, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.5319148936170213, |
|
"grad_norm": 0.5136646032333374, |
|
"learning_rate": 5.52756951109419e-05, |
|
"loss": 0.8706, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.5531914893617023, |
|
"grad_norm": 0.0806485190987587, |
|
"learning_rate": 5.405582551541579e-05, |
|
"loss": 0.0626, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.574468085106383, |
|
"grad_norm": 0.42770785093307495, |
|
"learning_rate": 5.283419854307425e-05, |
|
"loss": 1.0383, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"grad_norm": 0.4656476080417633, |
|
"learning_rate": 5.16115167402202e-05, |
|
"loss": 1.293, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.6170212765957448, |
|
"grad_norm": 0.38593193888664246, |
|
"learning_rate": 5.0388483259779815e-05, |
|
"loss": 1.2081, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.6382978723404256, |
|
"grad_norm": 0.3994680643081665, |
|
"learning_rate": 4.916580145692577e-05, |
|
"loss": 1.0481, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.6595744680851063, |
|
"grad_norm": 0.506732702255249, |
|
"learning_rate": 4.794417448458422e-05, |
|
"loss": 0.8817, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.6808510638297873, |
|
"grad_norm": 0.272098571062088, |
|
"learning_rate": 4.67243048890581e-05, |
|
"loss": 0.2088, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"grad_norm": 0.35723525285720825, |
|
"learning_rate": 4.5506894205999334e-05, |
|
"loss": 0.6819, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7234042553191489, |
|
"grad_norm": 0.4605822265148163, |
|
"learning_rate": 4.429264255696214e-05, |
|
"loss": 1.3524, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.7446808510638299, |
|
"grad_norm": 0.38222458958625793, |
|
"learning_rate": 4.308224824676965e-05, |
|
"loss": 1.1625, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.7659574468085106, |
|
"grad_norm": 0.3701620399951935, |
|
"learning_rate": 4.187640736192537e-05, |
|
"loss": 1.064, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.7872340425531914, |
|
"grad_norm": 0.4499792456626892, |
|
"learning_rate": 4.067581337030022e-05, |
|
"loss": 0.9158, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.8085106382978724, |
|
"grad_norm": 0.3936365842819214, |
|
"learning_rate": 3.948115672232572e-05, |
|
"loss": 0.4121, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.8297872340425532, |
|
"grad_norm": 0.27606216073036194, |
|
"learning_rate": 3.8293124453922226e-05, |
|
"loss": 0.4227, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.851063829787234, |
|
"grad_norm": 0.48591378331184387, |
|
"learning_rate": 3.711239979139111e-05, |
|
"loss": 1.3174, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.872340425531915, |
|
"grad_norm": 0.3974682092666626, |
|
"learning_rate": 3.593966175849759e-05, |
|
"loss": 1.2122, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.8936170212765957, |
|
"grad_norm": 0.3816875219345093, |
|
"learning_rate": 3.477558478597054e-05, |
|
"loss": 1.1615, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.9148936170212765, |
|
"grad_norm": 0.4059462547302246, |
|
"learning_rate": 3.362083832364357e-05, |
|
"loss": 0.9824, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.9361702127659575, |
|
"grad_norm": 0.4912261366844177, |
|
"learning_rate": 3.247608645546074e-05, |
|
"loss": 0.683, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.9574468085106385, |
|
"grad_norm": 0.4084428548812866, |
|
"learning_rate": 3.134198751756804e-05, |
|
"loss": 1.0533, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.978723404255319, |
|
"grad_norm": 0.38703203201293945, |
|
"learning_rate": 3.0219193719710368e-05, |
|
"loss": 1.1633, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.4213656783103943, |
|
"learning_rate": 2.910835077015177e-05, |
|
"loss": 0.9266, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.021276595744681, |
|
"grad_norm": 0.08342910557985306, |
|
"learning_rate": 2.8010097504334692e-05, |
|
"loss": 0.0614, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.0425531914893615, |
|
"grad_norm": 0.46658873558044434, |
|
"learning_rate": 2.692506551749165e-05, |
|
"loss": 1.0568, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0638297872340425, |
|
"grad_norm": 0.4580381512641907, |
|
"learning_rate": 2.5853878801420582e-05, |
|
"loss": 1.2753, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.0851063829787235, |
|
"grad_norm": 0.3629004955291748, |
|
"learning_rate": 2.4797153385633147e-05, |
|
"loss": 1.0902, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.106382978723404, |
|
"grad_norm": 0.3911716938018799, |
|
"learning_rate": 2.3755496983081708e-05, |
|
"loss": 1.0178, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 0.475111186504364, |
|
"learning_rate": 2.2729508640669428e-05, |
|
"loss": 0.8545, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_loss": 0.9274308681488037, |
|
"eval_runtime": 3.2361, |
|
"eval_samples_per_second": 195.604, |
|
"eval_steps_per_second": 6.18, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.148936170212766, |
|
"grad_norm": 0.2774420976638794, |
|
"learning_rate": 2.1719778394743813e-05, |
|
"loss": 0.212, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 2.1702127659574466, |
|
"grad_norm": 0.3814822733402252, |
|
"learning_rate": 2.0726886931772476e-05, |
|
"loss": 0.7554, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.1914893617021276, |
|
"grad_norm": 0.423093318939209, |
|
"learning_rate": 1.9751405254395587e-05, |
|
"loss": 1.3008, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.2127659574468086, |
|
"grad_norm": 0.38136741518974304, |
|
"learning_rate": 1.879389435304766e-05, |
|
"loss": 1.1388, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.2340425531914896, |
|
"grad_norm": 0.39356493949890137, |
|
"learning_rate": 1.7854904883337184e-05, |
|
"loss": 0.9796, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.25531914893617, |
|
"grad_norm": 0.47382405400276184, |
|
"learning_rate": 1.693497684936963e-05, |
|
"loss": 0.8731, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.276595744680851, |
|
"grad_norm": 0.4495427906513214, |
|
"learning_rate": 1.6034639293196224e-05, |
|
"loss": 0.4196, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.297872340425532, |
|
"grad_norm": 0.26799333095550537, |
|
"learning_rate": 1.515440999056669e-05, |
|
"loss": 0.338, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.3191489361702127, |
|
"grad_norm": 0.4940812587738037, |
|
"learning_rate": 1.429479515316127e-05, |
|
"loss": 1.1992, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 2.3404255319148937, |
|
"grad_norm": 0.4212472140789032, |
|
"learning_rate": 1.3456289137473124e-05, |
|
"loss": 1.153, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.3617021276595747, |
|
"grad_norm": 0.4149324893951416, |
|
"learning_rate": 1.263937416050847e-05, |
|
"loss": 1.05, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.382978723404255, |
|
"grad_norm": 0.4490218460559845, |
|
"learning_rate": 1.1844520022468092e-05, |
|
"loss": 0.9362, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.404255319148936, |
|
"grad_norm": 0.5155778527259827, |
|
"learning_rate": 1.1072183836569599e-05, |
|
"loss": 0.615, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.425531914893617, |
|
"grad_norm": 0.1996319442987442, |
|
"learning_rate": 1.0322809766165916e-05, |
|
"loss": 0.1577, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.4468085106382977, |
|
"grad_norm": 0.5449104905128479, |
|
"learning_rate": 9.596828769311028e-06, |
|
"loss": 1.2252, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.4680851063829787, |
|
"grad_norm": 0.4638878405094147, |
|
"learning_rate": 8.894658350919999e-06, |
|
"loss": 1.2128, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.4893617021276597, |
|
"grad_norm": 0.4165002107620239, |
|
"learning_rate": 8.216702322665849e-06, |
|
"loss": 1.0802, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.5106382978723403, |
|
"grad_norm": 0.42553117871284485, |
|
"learning_rate": 7.563350570751137e-06, |
|
"loss": 0.9043, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.5319148936170213, |
|
"grad_norm": 0.5580489039421082, |
|
"learning_rate": 6.934978831688112e-06, |
|
"loss": 0.7406, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"grad_norm": 0.14457367360591888, |
|
"learning_rate": 6.331948476216073e-06, |
|
"loss": 0.0878, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.574468085106383, |
|
"grad_norm": 0.46198827028274536, |
|
"learning_rate": 5.754606301480452e-06, |
|
"loss": 1.0055, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.595744680851064, |
|
"grad_norm": 0.4651603102684021, |
|
"learning_rate": 5.2032843315930305e-06, |
|
"loss": 1.2232, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.617021276595745, |
|
"grad_norm": 0.41709625720977783, |
|
"learning_rate": 4.678299626687903e-06, |
|
"loss": 1.0644, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.6382978723404253, |
|
"grad_norm": 0.42545390129089355, |
|
"learning_rate": 4.179954100583199e-06, |
|
"loss": 0.9916, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 0.5188893675804138, |
|
"learning_rate": 3.708534347153212e-06, |
|
"loss": 0.8641, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.6808510638297873, |
|
"grad_norm": 0.30658212304115295, |
|
"learning_rate": 3.26431147551097e-06, |
|
"loss": 0.1864, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.702127659574468, |
|
"grad_norm": 0.4011896252632141, |
|
"learning_rate": 2.8475409540958616e-06, |
|
"loss": 0.6371, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.723404255319149, |
|
"grad_norm": 0.4772135615348816, |
|
"learning_rate": 2.45846246375617e-06, |
|
"loss": 1.2719, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.74468085106383, |
|
"grad_norm": 0.44009700417518616, |
|
"learning_rate": 2.097299759910797e-06, |
|
"loss": 1.1568, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.7659574468085104, |
|
"grad_norm": 0.42015552520751953, |
|
"learning_rate": 1.7642605438696306e-06, |
|
"loss": 1.0539, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.7872340425531914, |
|
"grad_norm": 0.4982571303844452, |
|
"learning_rate": 1.4595363433864484e-06, |
|
"loss": 0.8517, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.8085106382978724, |
|
"grad_norm": 0.4780231714248657, |
|
"learning_rate": 1.1833024025130858e-06, |
|
"loss": 0.3794, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.829787234042553, |
|
"grad_norm": 0.3601129949092865, |
|
"learning_rate": 9.357175808182305e-07, |
|
"loss": 0.5229, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.851063829787234, |
|
"grad_norm": 0.5078785419464111, |
|
"learning_rate": 7.169242620287227e-07, |
|
"loss": 1.2511, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.872340425531915, |
|
"grad_norm": 0.42775991559028625, |
|
"learning_rate": 5.270482721460563e-07, |
|
"loss": 1.1546, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.8936170212765955, |
|
"grad_norm": 0.43165627121925354, |
|
"learning_rate": 3.6619880708494724e-07, |
|
"loss": 1.0596, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.9148936170212765, |
|
"grad_norm": 0.4631091356277466, |
|
"learning_rate": 2.3446836987585295e-07, |
|
"loss": 0.9146, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.9361702127659575, |
|
"grad_norm": 0.5159528851509094, |
|
"learning_rate": 1.319327174672832e-07, |
|
"loss": 0.5763, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.9574468085106385, |
|
"grad_norm": 0.426923006772995, |
|
"learning_rate": 5.865081715870424e-08, |
|
"loss": 0.7757, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.978723404255319, |
|
"grad_norm": 0.4215574264526367, |
|
"learning_rate": 1.4664812689001438e-08, |
|
"loss": 1.14, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.5064499378204346, |
|
"learning_rate": 0.0, |
|
"loss": 0.8367, |
|
"step": 141 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 141, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.466930952990884e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|