PEFT
Safetensors
Japanese
English
gemma-2-9b-4bit-magpie / trainer_state.json
mssfj's picture
Upload folder using huggingface_hub
d740e35 verified
raw
history blame
30 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.821256038647343,
"eval_steps": 500,
"global_step": 170,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004830917874396135,
"grad_norm": 0.7645118236541748,
"learning_rate": 5e-06,
"loss": 1.259,
"step": 1
},
{
"epoch": 0.00966183574879227,
"grad_norm": 0.9345910549163818,
"learning_rate": 1e-05,
"loss": 1.4787,
"step": 2
},
{
"epoch": 0.014492753623188406,
"grad_norm": 0.9917318224906921,
"learning_rate": 1.5e-05,
"loss": 1.5453,
"step": 3
},
{
"epoch": 0.01932367149758454,
"grad_norm": 1.0239824056625366,
"learning_rate": 2e-05,
"loss": 1.5964,
"step": 4
},
{
"epoch": 0.024154589371980676,
"grad_norm": 0.9726951718330383,
"learning_rate": 2.5e-05,
"loss": 1.5687,
"step": 5
},
{
"epoch": 0.028985507246376812,
"grad_norm": 0.7599917650222778,
"learning_rate": 3e-05,
"loss": 1.5249,
"step": 6
},
{
"epoch": 0.033816425120772944,
"grad_norm": 0.5268093347549438,
"learning_rate": 3.5e-05,
"loss": 1.4637,
"step": 7
},
{
"epoch": 0.03864734299516908,
"grad_norm": 0.5739946365356445,
"learning_rate": 4e-05,
"loss": 1.4514,
"step": 8
},
{
"epoch": 0.043478260869565216,
"grad_norm": 0.6630675792694092,
"learning_rate": 4.5e-05,
"loss": 1.442,
"step": 9
},
{
"epoch": 0.04830917874396135,
"grad_norm": 0.5699703097343445,
"learning_rate": 5e-05,
"loss": 1.4091,
"step": 10
},
{
"epoch": 0.05314009661835749,
"grad_norm": 0.4952673017978668,
"learning_rate": 5.500000000000001e-05,
"loss": 1.3877,
"step": 11
},
{
"epoch": 0.057971014492753624,
"grad_norm": 0.5180989503860474,
"learning_rate": 6e-05,
"loss": 1.3707,
"step": 12
},
{
"epoch": 0.06280193236714976,
"grad_norm": 0.41192442178726196,
"learning_rate": 6.500000000000001e-05,
"loss": 1.3599,
"step": 13
},
{
"epoch": 0.06763285024154589,
"grad_norm": 0.28801196813583374,
"learning_rate": 7e-05,
"loss": 1.3484,
"step": 14
},
{
"epoch": 0.07246376811594203,
"grad_norm": 0.2618640959262848,
"learning_rate": 7.500000000000001e-05,
"loss": 1.3382,
"step": 15
},
{
"epoch": 0.07729468599033816,
"grad_norm": 0.2657703161239624,
"learning_rate": 8e-05,
"loss": 1.335,
"step": 16
},
{
"epoch": 0.0821256038647343,
"grad_norm": 0.2432931512594223,
"learning_rate": 8.5e-05,
"loss": 1.323,
"step": 17
},
{
"epoch": 0.08695652173913043,
"grad_norm": 0.24172987043857574,
"learning_rate": 9e-05,
"loss": 1.3317,
"step": 18
},
{
"epoch": 0.09178743961352658,
"grad_norm": 0.26086804270744324,
"learning_rate": 9.5e-05,
"loss": 1.3163,
"step": 19
},
{
"epoch": 0.0966183574879227,
"grad_norm": 0.2007642686367035,
"learning_rate": 0.0001,
"loss": 1.2877,
"step": 20
},
{
"epoch": 0.10144927536231885,
"grad_norm": 0.2327784299850464,
"learning_rate": 9.946524064171123e-05,
"loss": 1.2899,
"step": 21
},
{
"epoch": 0.10628019323671498,
"grad_norm": 0.20648740231990814,
"learning_rate": 9.893048128342246e-05,
"loss": 1.287,
"step": 22
},
{
"epoch": 0.1111111111111111,
"grad_norm": 0.22094646096229553,
"learning_rate": 9.83957219251337e-05,
"loss": 1.2873,
"step": 23
},
{
"epoch": 0.11594202898550725,
"grad_norm": 0.18131175637245178,
"learning_rate": 9.786096256684493e-05,
"loss": 1.2594,
"step": 24
},
{
"epoch": 0.12077294685990338,
"grad_norm": 0.16657911241054535,
"learning_rate": 9.732620320855615e-05,
"loss": 1.2639,
"step": 25
},
{
"epoch": 0.12560386473429952,
"grad_norm": 0.1740303933620453,
"learning_rate": 9.679144385026739e-05,
"loss": 1.2603,
"step": 26
},
{
"epoch": 0.13043478260869565,
"grad_norm": 0.15640808641910553,
"learning_rate": 9.625668449197861e-05,
"loss": 1.274,
"step": 27
},
{
"epoch": 0.13526570048309178,
"grad_norm": 0.16680403053760529,
"learning_rate": 9.572192513368984e-05,
"loss": 1.2724,
"step": 28
},
{
"epoch": 0.14009661835748793,
"grad_norm": 0.15997755527496338,
"learning_rate": 9.518716577540108e-05,
"loss": 1.2672,
"step": 29
},
{
"epoch": 0.14492753623188406,
"grad_norm": 0.15305301547050476,
"learning_rate": 9.46524064171123e-05,
"loss": 1.2718,
"step": 30
},
{
"epoch": 0.1497584541062802,
"grad_norm": 0.14839769899845123,
"learning_rate": 9.411764705882353e-05,
"loss": 1.2707,
"step": 31
},
{
"epoch": 0.15458937198067632,
"grad_norm": 0.14878958463668823,
"learning_rate": 9.358288770053476e-05,
"loss": 1.2648,
"step": 32
},
{
"epoch": 0.15942028985507245,
"grad_norm": 0.17154482007026672,
"learning_rate": 9.3048128342246e-05,
"loss": 1.2641,
"step": 33
},
{
"epoch": 0.1642512077294686,
"grad_norm": 0.1447138488292694,
"learning_rate": 9.251336898395723e-05,
"loss": 1.27,
"step": 34
},
{
"epoch": 0.16908212560386474,
"grad_norm": 0.1631896197795868,
"learning_rate": 9.197860962566846e-05,
"loss": 1.276,
"step": 35
},
{
"epoch": 0.17391304347826086,
"grad_norm": 0.14892889559268951,
"learning_rate": 9.144385026737968e-05,
"loss": 1.2747,
"step": 36
},
{
"epoch": 0.178743961352657,
"grad_norm": 0.1588708907365799,
"learning_rate": 9.090909090909092e-05,
"loss": 1.276,
"step": 37
},
{
"epoch": 0.18357487922705315,
"grad_norm": 0.151743546128273,
"learning_rate": 9.037433155080214e-05,
"loss": 1.2788,
"step": 38
},
{
"epoch": 0.18840579710144928,
"grad_norm": 0.15703994035720825,
"learning_rate": 8.983957219251337e-05,
"loss": 1.2936,
"step": 39
},
{
"epoch": 0.1932367149758454,
"grad_norm": 0.1660437434911728,
"learning_rate": 8.930481283422461e-05,
"loss": 1.2824,
"step": 40
},
{
"epoch": 0.19806763285024154,
"grad_norm": 0.15268553793430328,
"learning_rate": 8.877005347593583e-05,
"loss": 1.3056,
"step": 41
},
{
"epoch": 0.2028985507246377,
"grad_norm": 0.1577601134777069,
"learning_rate": 8.823529411764706e-05,
"loss": 1.3142,
"step": 42
},
{
"epoch": 0.20772946859903382,
"grad_norm": 0.16757714748382568,
"learning_rate": 8.770053475935829e-05,
"loss": 1.3389,
"step": 43
},
{
"epoch": 0.21256038647342995,
"grad_norm": 0.1712018847465515,
"learning_rate": 8.716577540106952e-05,
"loss": 1.3437,
"step": 44
},
{
"epoch": 0.21739130434782608,
"grad_norm": 0.1829441487789154,
"learning_rate": 8.663101604278076e-05,
"loss": 1.358,
"step": 45
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.20615732669830322,
"learning_rate": 8.609625668449198e-05,
"loss": 1.3878,
"step": 46
},
{
"epoch": 0.22705314009661837,
"grad_norm": 0.23940807580947876,
"learning_rate": 8.556149732620321e-05,
"loss": 1.4699,
"step": 47
},
{
"epoch": 0.2318840579710145,
"grad_norm": 0.41468575596809387,
"learning_rate": 8.502673796791443e-05,
"loss": 1.4893,
"step": 48
},
{
"epoch": 0.23671497584541062,
"grad_norm": 0.5656126737594604,
"learning_rate": 8.449197860962568e-05,
"loss": 1.4967,
"step": 49
},
{
"epoch": 0.24154589371980675,
"grad_norm": 4.2125325202941895,
"learning_rate": 8.39572192513369e-05,
"loss": 1.6295,
"step": 50
},
{
"epoch": 0.2463768115942029,
"grad_norm": 1.100631833076477,
"learning_rate": 8.342245989304814e-05,
"loss": 1.0523,
"step": 51
},
{
"epoch": 0.25120772946859904,
"grad_norm": 0.44898825883865356,
"learning_rate": 8.288770053475936e-05,
"loss": 1.106,
"step": 52
},
{
"epoch": 0.2560386473429952,
"grad_norm": 0.2860921025276184,
"learning_rate": 8.23529411764706e-05,
"loss": 1.1398,
"step": 53
},
{
"epoch": 0.2608695652173913,
"grad_norm": 0.28824105858802795,
"learning_rate": 8.181818181818183e-05,
"loss": 1.1547,
"step": 54
},
{
"epoch": 0.26570048309178745,
"grad_norm": 0.32123416662216187,
"learning_rate": 8.128342245989305e-05,
"loss": 1.1646,
"step": 55
},
{
"epoch": 0.27053140096618356,
"grad_norm": 0.2752850353717804,
"learning_rate": 8.074866310160429e-05,
"loss": 1.1752,
"step": 56
},
{
"epoch": 0.2753623188405797,
"grad_norm": 0.22371803224086761,
"learning_rate": 8.021390374331551e-05,
"loss": 1.1934,
"step": 57
},
{
"epoch": 0.28019323671497587,
"grad_norm": 0.23126192390918732,
"learning_rate": 7.967914438502674e-05,
"loss": 1.2057,
"step": 58
},
{
"epoch": 0.28502415458937197,
"grad_norm": 0.24694480001926422,
"learning_rate": 7.914438502673798e-05,
"loss": 1.1858,
"step": 59
},
{
"epoch": 0.2898550724637681,
"grad_norm": 0.20105589926242828,
"learning_rate": 7.86096256684492e-05,
"loss": 1.202,
"step": 60
},
{
"epoch": 0.2946859903381642,
"grad_norm": 0.15975232422351837,
"learning_rate": 7.807486631016043e-05,
"loss": 1.2014,
"step": 61
},
{
"epoch": 0.2995169082125604,
"grad_norm": 0.17269295454025269,
"learning_rate": 7.754010695187165e-05,
"loss": 1.1878,
"step": 62
},
{
"epoch": 0.30434782608695654,
"grad_norm": 0.1990584284067154,
"learning_rate": 7.700534759358289e-05,
"loss": 1.1951,
"step": 63
},
{
"epoch": 0.30917874396135264,
"grad_norm": 0.18062998354434967,
"learning_rate": 7.647058823529411e-05,
"loss": 1.1978,
"step": 64
},
{
"epoch": 0.3140096618357488,
"grad_norm": 0.15606802701950073,
"learning_rate": 7.593582887700536e-05,
"loss": 1.2153,
"step": 65
},
{
"epoch": 0.3188405797101449,
"grad_norm": 0.1434660404920578,
"learning_rate": 7.540106951871658e-05,
"loss": 1.2074,
"step": 66
},
{
"epoch": 0.32367149758454106,
"grad_norm": 0.1473468840122223,
"learning_rate": 7.486631016042782e-05,
"loss": 1.1962,
"step": 67
},
{
"epoch": 0.3285024154589372,
"grad_norm": 0.1452961415052414,
"learning_rate": 7.433155080213904e-05,
"loss": 1.209,
"step": 68
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.14672888815402985,
"learning_rate": 7.379679144385027e-05,
"loss": 1.2126,
"step": 69
},
{
"epoch": 0.33816425120772947,
"grad_norm": 0.14124266803264618,
"learning_rate": 7.326203208556151e-05,
"loss": 1.2111,
"step": 70
},
{
"epoch": 0.34299516908212563,
"grad_norm": 0.13139352202415466,
"learning_rate": 7.272727272727273e-05,
"loss": 1.216,
"step": 71
},
{
"epoch": 0.34782608695652173,
"grad_norm": 0.1385214626789093,
"learning_rate": 7.219251336898396e-05,
"loss": 1.2199,
"step": 72
},
{
"epoch": 0.3526570048309179,
"grad_norm": 0.1288975179195404,
"learning_rate": 7.165775401069518e-05,
"loss": 1.201,
"step": 73
},
{
"epoch": 0.357487922705314,
"grad_norm": 0.13003361225128174,
"learning_rate": 7.112299465240642e-05,
"loss": 1.2186,
"step": 74
},
{
"epoch": 0.36231884057971014,
"grad_norm": 0.13762855529785156,
"learning_rate": 7.058823529411765e-05,
"loss": 1.2209,
"step": 75
},
{
"epoch": 0.3671497584541063,
"grad_norm": 0.13935087621212006,
"learning_rate": 7.005347593582889e-05,
"loss": 1.2219,
"step": 76
},
{
"epoch": 0.3719806763285024,
"grad_norm": 0.13384683430194855,
"learning_rate": 6.951871657754011e-05,
"loss": 1.2419,
"step": 77
},
{
"epoch": 0.37681159420289856,
"grad_norm": 0.12453139573335648,
"learning_rate": 6.898395721925133e-05,
"loss": 1.2154,
"step": 78
},
{
"epoch": 0.38164251207729466,
"grad_norm": 0.13903535902500153,
"learning_rate": 6.844919786096257e-05,
"loss": 1.2378,
"step": 79
},
{
"epoch": 0.3864734299516908,
"grad_norm": 0.13833968341350555,
"learning_rate": 6.79144385026738e-05,
"loss": 1.2244,
"step": 80
},
{
"epoch": 0.391304347826087,
"grad_norm": 0.13052114844322205,
"learning_rate": 6.737967914438504e-05,
"loss": 1.226,
"step": 81
},
{
"epoch": 0.3961352657004831,
"grad_norm": 0.13437196612358093,
"learning_rate": 6.684491978609626e-05,
"loss": 1.2457,
"step": 82
},
{
"epoch": 0.40096618357487923,
"grad_norm": 0.13693881034851074,
"learning_rate": 6.631016042780749e-05,
"loss": 1.2384,
"step": 83
},
{
"epoch": 0.4057971014492754,
"grad_norm": 0.13426737487316132,
"learning_rate": 6.577540106951871e-05,
"loss": 1.2427,
"step": 84
},
{
"epoch": 0.4106280193236715,
"grad_norm": 0.13844414055347443,
"learning_rate": 6.524064171122995e-05,
"loss": 1.2661,
"step": 85
},
{
"epoch": 0.41545893719806765,
"grad_norm": 0.13957887887954712,
"learning_rate": 6.470588235294118e-05,
"loss": 1.2696,
"step": 86
},
{
"epoch": 0.42028985507246375,
"grad_norm": 0.13465899229049683,
"learning_rate": 6.41711229946524e-05,
"loss": 1.2591,
"step": 87
},
{
"epoch": 0.4251207729468599,
"grad_norm": 0.1441555917263031,
"learning_rate": 6.363636363636364e-05,
"loss": 1.2766,
"step": 88
},
{
"epoch": 0.42995169082125606,
"grad_norm": 0.1500505656003952,
"learning_rate": 6.310160427807486e-05,
"loss": 1.2887,
"step": 89
},
{
"epoch": 0.43478260869565216,
"grad_norm": 0.15137352049350739,
"learning_rate": 6.25668449197861e-05,
"loss": 1.2792,
"step": 90
},
{
"epoch": 0.4396135265700483,
"grad_norm": 0.15071454644203186,
"learning_rate": 6.203208556149733e-05,
"loss": 1.2876,
"step": 91
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.1570783108472824,
"learning_rate": 6.149732620320857e-05,
"loss": 1.3223,
"step": 92
},
{
"epoch": 0.4492753623188406,
"grad_norm": 0.16483648121356964,
"learning_rate": 6.096256684491979e-05,
"loss": 1.3285,
"step": 93
},
{
"epoch": 0.45410628019323673,
"grad_norm": 0.1700102537870407,
"learning_rate": 6.0427807486631016e-05,
"loss": 1.3349,
"step": 94
},
{
"epoch": 0.45893719806763283,
"grad_norm": 0.1778935194015503,
"learning_rate": 5.9893048128342244e-05,
"loss": 1.3644,
"step": 95
},
{
"epoch": 0.463768115942029,
"grad_norm": 0.19471201300621033,
"learning_rate": 5.9358288770053486e-05,
"loss": 1.4028,
"step": 96
},
{
"epoch": 0.46859903381642515,
"grad_norm": 0.25646305084228516,
"learning_rate": 5.882352941176471e-05,
"loss": 1.4694,
"step": 97
},
{
"epoch": 0.47342995169082125,
"grad_norm": 0.277474045753479,
"learning_rate": 5.8288770053475936e-05,
"loss": 1.5052,
"step": 98
},
{
"epoch": 0.4782608695652174,
"grad_norm": 0.33792170882225037,
"learning_rate": 5.7754010695187164e-05,
"loss": 1.5414,
"step": 99
},
{
"epoch": 0.4830917874396135,
"grad_norm": 0.6432341933250427,
"learning_rate": 5.721925133689839e-05,
"loss": 1.6046,
"step": 100
},
{
"epoch": 0.48792270531400966,
"grad_norm": 0.5846565365791321,
"learning_rate": 5.6684491978609634e-05,
"loss": 0.9549,
"step": 101
},
{
"epoch": 0.4927536231884058,
"grad_norm": 0.49914559721946716,
"learning_rate": 5.614973262032086e-05,
"loss": 1.0978,
"step": 102
},
{
"epoch": 0.4975845410628019,
"grad_norm": 0.33365777134895325,
"learning_rate": 5.561497326203209e-05,
"loss": 1.1242,
"step": 103
},
{
"epoch": 0.5024154589371981,
"grad_norm": 0.7763075828552246,
"learning_rate": 5.508021390374332e-05,
"loss": 1.1525,
"step": 104
},
{
"epoch": 0.5072463768115942,
"grad_norm": 0.2733679413795471,
"learning_rate": 5.4545454545454546e-05,
"loss": 1.1607,
"step": 105
},
{
"epoch": 0.5120772946859904,
"grad_norm": 0.26070019602775574,
"learning_rate": 5.401069518716578e-05,
"loss": 1.1609,
"step": 106
},
{
"epoch": 0.5169082125603864,
"grad_norm": 0.2708110809326172,
"learning_rate": 5.347593582887701e-05,
"loss": 1.1815,
"step": 107
},
{
"epoch": 0.5217391304347826,
"grad_norm": 0.26362475752830505,
"learning_rate": 5.294117647058824e-05,
"loss": 1.1943,
"step": 108
},
{
"epoch": 0.5265700483091788,
"grad_norm": 0.21209686994552612,
"learning_rate": 5.2406417112299466e-05,
"loss": 1.1944,
"step": 109
},
{
"epoch": 0.5314009661835749,
"grad_norm": 0.21895644068717957,
"learning_rate": 5.1871657754010694e-05,
"loss": 1.192,
"step": 110
},
{
"epoch": 0.5362318840579711,
"grad_norm": 0.20762521028518677,
"learning_rate": 5.1336898395721935e-05,
"loss": 1.1987,
"step": 111
},
{
"epoch": 0.5410628019323671,
"grad_norm": 0.18395845592021942,
"learning_rate": 5.0802139037433164e-05,
"loss": 1.1925,
"step": 112
},
{
"epoch": 0.5458937198067633,
"grad_norm": 0.1653558313846588,
"learning_rate": 5.026737967914439e-05,
"loss": 1.1961,
"step": 113
},
{
"epoch": 0.5507246376811594,
"grad_norm": 0.1628160923719406,
"learning_rate": 4.973262032085561e-05,
"loss": 1.1993,
"step": 114
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.17022235691547394,
"learning_rate": 4.919786096256685e-05,
"loss": 1.2101,
"step": 115
},
{
"epoch": 0.5603864734299517,
"grad_norm": 0.1710771918296814,
"learning_rate": 4.8663101604278076e-05,
"loss": 1.2059,
"step": 116
},
{
"epoch": 0.5652173913043478,
"grad_norm": 0.17160499095916748,
"learning_rate": 4.8128342245989304e-05,
"loss": 1.206,
"step": 117
},
{
"epoch": 0.5700483091787439,
"grad_norm": 0.15871083736419678,
"learning_rate": 4.759358288770054e-05,
"loss": 1.2067,
"step": 118
},
{
"epoch": 0.5748792270531401,
"grad_norm": 0.14880803227424622,
"learning_rate": 4.705882352941177e-05,
"loss": 1.218,
"step": 119
},
{
"epoch": 0.5797101449275363,
"grad_norm": 0.14413942396640778,
"learning_rate": 4.6524064171123e-05,
"loss": 1.1958,
"step": 120
},
{
"epoch": 0.5845410628019324,
"grad_norm": 0.14965958893299103,
"learning_rate": 4.598930481283423e-05,
"loss": 1.2206,
"step": 121
},
{
"epoch": 0.5893719806763285,
"grad_norm": 0.14546802639961243,
"learning_rate": 4.545454545454546e-05,
"loss": 1.2107,
"step": 122
},
{
"epoch": 0.5942028985507246,
"grad_norm": 0.14043672382831573,
"learning_rate": 4.491978609625669e-05,
"loss": 1.2059,
"step": 123
},
{
"epoch": 0.5990338164251208,
"grad_norm": 0.1403893083333969,
"learning_rate": 4.4385026737967915e-05,
"loss": 1.2327,
"step": 124
},
{
"epoch": 0.6038647342995169,
"grad_norm": 0.13266952335834503,
"learning_rate": 4.385026737967914e-05,
"loss": 1.194,
"step": 125
},
{
"epoch": 0.6086956521739131,
"grad_norm": 0.1347023993730545,
"learning_rate": 4.331550802139038e-05,
"loss": 1.1951,
"step": 126
},
{
"epoch": 0.6135265700483091,
"grad_norm": 0.13116984069347382,
"learning_rate": 4.2780748663101606e-05,
"loss": 1.2206,
"step": 127
},
{
"epoch": 0.6183574879227053,
"grad_norm": 0.14027127623558044,
"learning_rate": 4.224598930481284e-05,
"loss": 1.2304,
"step": 128
},
{
"epoch": 0.6231884057971014,
"grad_norm": 0.13990454375743866,
"learning_rate": 4.171122994652407e-05,
"loss": 1.2229,
"step": 129
},
{
"epoch": 0.6280193236714976,
"grad_norm": 0.13515028357505798,
"learning_rate": 4.11764705882353e-05,
"loss": 1.2191,
"step": 130
},
{
"epoch": 0.6328502415458938,
"grad_norm": 0.1329352706670761,
"learning_rate": 4.0641711229946525e-05,
"loss": 1.2308,
"step": 131
},
{
"epoch": 0.6376811594202898,
"grad_norm": 0.13061358034610748,
"learning_rate": 4.0106951871657754e-05,
"loss": 1.2446,
"step": 132
},
{
"epoch": 0.642512077294686,
"grad_norm": 0.13514551520347595,
"learning_rate": 3.957219251336899e-05,
"loss": 1.2425,
"step": 133
},
{
"epoch": 0.6473429951690821,
"grad_norm": 0.13962285220623016,
"learning_rate": 3.903743315508022e-05,
"loss": 1.2574,
"step": 134
},
{
"epoch": 0.6521739130434783,
"grad_norm": 0.14081381261348724,
"learning_rate": 3.8502673796791445e-05,
"loss": 1.247,
"step": 135
},
{
"epoch": 0.6570048309178744,
"grad_norm": 0.1396479606628418,
"learning_rate": 3.796791443850268e-05,
"loss": 1.2379,
"step": 136
},
{
"epoch": 0.6618357487922706,
"grad_norm": 0.13990682363510132,
"learning_rate": 3.743315508021391e-05,
"loss": 1.2637,
"step": 137
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.1405869573354721,
"learning_rate": 3.6898395721925136e-05,
"loss": 1.2458,
"step": 138
},
{
"epoch": 0.6714975845410628,
"grad_norm": 0.14569957554340363,
"learning_rate": 3.6363636363636364e-05,
"loss": 1.2755,
"step": 139
},
{
"epoch": 0.6763285024154589,
"grad_norm": 0.14868015050888062,
"learning_rate": 3.582887700534759e-05,
"loss": 1.2836,
"step": 140
},
{
"epoch": 0.6811594202898551,
"grad_norm": 0.1531868726015091,
"learning_rate": 3.529411764705883e-05,
"loss": 1.295,
"step": 141
},
{
"epoch": 0.6859903381642513,
"grad_norm": 0.16108393669128418,
"learning_rate": 3.4759358288770055e-05,
"loss": 1.3239,
"step": 142
},
{
"epoch": 0.6908212560386473,
"grad_norm": 0.1609143316745758,
"learning_rate": 3.4224598930481284e-05,
"loss": 1.3301,
"step": 143
},
{
"epoch": 0.6956521739130435,
"grad_norm": 0.16705213487148285,
"learning_rate": 3.368983957219252e-05,
"loss": 1.3484,
"step": 144
},
{
"epoch": 0.7004830917874396,
"grad_norm": 0.18058659136295319,
"learning_rate": 3.3155080213903747e-05,
"loss": 1.3794,
"step": 145
},
{
"epoch": 0.7053140096618358,
"grad_norm": 0.20221418142318726,
"learning_rate": 3.2620320855614975e-05,
"loss": 1.4019,
"step": 146
},
{
"epoch": 0.7101449275362319,
"grad_norm": 0.24968379735946655,
"learning_rate": 3.20855614973262e-05,
"loss": 1.4674,
"step": 147
},
{
"epoch": 0.714975845410628,
"grad_norm": 0.3043461740016937,
"learning_rate": 3.155080213903743e-05,
"loss": 1.4972,
"step": 148
},
{
"epoch": 0.7198067632850241,
"grad_norm": 0.33808770775794983,
"learning_rate": 3.1016042780748666e-05,
"loss": 1.5237,
"step": 149
},
{
"epoch": 0.7246376811594203,
"grad_norm": 0.5125290155410767,
"learning_rate": 3.0481283422459894e-05,
"loss": 1.5533,
"step": 150
},
{
"epoch": 0.7294685990338164,
"grad_norm": 0.25133025646209717,
"learning_rate": 2.9946524064171122e-05,
"loss": 0.8978,
"step": 151
},
{
"epoch": 0.7342995169082126,
"grad_norm": 0.30135515332221985,
"learning_rate": 2.9411764705882354e-05,
"loss": 1.0278,
"step": 152
},
{
"epoch": 0.7391304347826086,
"grad_norm": 0.2961145043373108,
"learning_rate": 2.8877005347593582e-05,
"loss": 1.1024,
"step": 153
},
{
"epoch": 0.7439613526570048,
"grad_norm": 0.273294597864151,
"learning_rate": 2.8342245989304817e-05,
"loss": 1.1373,
"step": 154
},
{
"epoch": 0.748792270531401,
"grad_norm": 0.23799936473369598,
"learning_rate": 2.7807486631016045e-05,
"loss": 1.1419,
"step": 155
},
{
"epoch": 0.7536231884057971,
"grad_norm": 0.21061824262142181,
"learning_rate": 2.7272727272727273e-05,
"loss": 1.1512,
"step": 156
},
{
"epoch": 0.7584541062801933,
"grad_norm": 0.21470795571804047,
"learning_rate": 2.6737967914438505e-05,
"loss": 1.1616,
"step": 157
},
{
"epoch": 0.7632850241545893,
"grad_norm": 0.21231332421302795,
"learning_rate": 2.6203208556149733e-05,
"loss": 1.1662,
"step": 158
},
{
"epoch": 0.7681159420289855,
"grad_norm": 0.20699279010295868,
"learning_rate": 2.5668449197860968e-05,
"loss": 1.1716,
"step": 159
},
{
"epoch": 0.7729468599033816,
"grad_norm": 0.20150579512119293,
"learning_rate": 2.5133689839572196e-05,
"loss": 1.1836,
"step": 160
},
{
"epoch": 0.7777777777777778,
"grad_norm": 0.1880892962217331,
"learning_rate": 2.4598930481283424e-05,
"loss": 1.1794,
"step": 161
},
{
"epoch": 0.782608695652174,
"grad_norm": 0.17840184271335602,
"learning_rate": 2.4064171122994652e-05,
"loss": 1.1757,
"step": 162
},
{
"epoch": 0.7874396135265701,
"grad_norm": 0.18409621715545654,
"learning_rate": 2.3529411764705884e-05,
"loss": 1.1935,
"step": 163
},
{
"epoch": 0.7922705314009661,
"grad_norm": 0.18802137672901154,
"learning_rate": 2.2994652406417115e-05,
"loss": 1.1795,
"step": 164
},
{
"epoch": 0.7971014492753623,
"grad_norm": 0.1941538006067276,
"learning_rate": 2.2459893048128343e-05,
"loss": 1.2069,
"step": 165
},
{
"epoch": 0.8019323671497585,
"grad_norm": 0.18578243255615234,
"learning_rate": 2.192513368983957e-05,
"loss": 1.1965,
"step": 166
},
{
"epoch": 0.8067632850241546,
"grad_norm": 0.17622320353984833,
"learning_rate": 2.1390374331550803e-05,
"loss": 1.1839,
"step": 167
},
{
"epoch": 0.8115942028985508,
"grad_norm": 0.16080059111118317,
"learning_rate": 2.0855614973262035e-05,
"loss": 1.1926,
"step": 168
},
{
"epoch": 0.8164251207729468,
"grad_norm": 0.14835765957832336,
"learning_rate": 2.0320855614973263e-05,
"loss": 1.1958,
"step": 169
},
{
"epoch": 0.821256038647343,
"grad_norm": 0.14560697972774506,
"learning_rate": 1.9786096256684494e-05,
"loss": 1.2047,
"step": 170
}
],
"logging_steps": 1,
"max_steps": 207,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.493930894749139e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}