Qllama-.5B-Base-Wiki-Chat-RAG / trainer_state.json
Josephgflowers's picture
Upload 14 files
8e88b88 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 29459,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0033945483553413217,
"grad_norm": 1.450656533241272,
"learning_rate": 4.983027258223293e-05,
"loss": 1.3353,
"step": 100
},
{
"epoch": 0.0067890967106826435,
"grad_norm": 1.7437301874160767,
"learning_rate": 4.966224243864354e-05,
"loss": 1.2966,
"step": 200
},
{
"epoch": 0.010183645066023966,
"grad_norm": 1.5103371143341064,
"learning_rate": 4.949251502087647e-05,
"loss": 1.2919,
"step": 300
},
{
"epoch": 0.013578193421365287,
"grad_norm": 1.2713204622268677,
"learning_rate": 4.932278760310941e-05,
"loss": 1.417,
"step": 400
},
{
"epoch": 0.01697274177670661,
"grad_norm": 1.9554837942123413,
"learning_rate": 4.915306018534234e-05,
"loss": 1.3327,
"step": 500
},
{
"epoch": 0.02036729013204793,
"grad_norm": 5.13068962097168,
"learning_rate": 4.898333276757528e-05,
"loss": 1.3058,
"step": 600
},
{
"epoch": 0.023761838487389254,
"grad_norm": 1.6552410125732422,
"learning_rate": 4.881360534980821e-05,
"loss": 1.3226,
"step": 700
},
{
"epoch": 0.027156386842730574,
"grad_norm": 1.807737946510315,
"learning_rate": 4.864387793204114e-05,
"loss": 1.334,
"step": 800
},
{
"epoch": 0.030550935198071897,
"grad_norm": 1.570865273475647,
"learning_rate": 4.847415051427408e-05,
"loss": 1.3456,
"step": 900
},
{
"epoch": 0.03394548355341322,
"grad_norm": 1.8917468786239624,
"learning_rate": 4.830442309650701e-05,
"loss": 1.3255,
"step": 1000
},
{
"epoch": 0.03734003190875454,
"grad_norm": 1.649667739868164,
"learning_rate": 4.813469567873995e-05,
"loss": 1.3013,
"step": 1100
},
{
"epoch": 0.04073458026409586,
"grad_norm": 2.6449484825134277,
"learning_rate": 4.796666553515055e-05,
"loss": 1.3369,
"step": 1200
},
{
"epoch": 0.044129128619437186,
"grad_norm": 1.874516487121582,
"learning_rate": 4.779693811738348e-05,
"loss": 1.27,
"step": 1300
},
{
"epoch": 0.04752367697477851,
"grad_norm": 1.7284377813339233,
"learning_rate": 4.762721069961642e-05,
"loss": 1.3931,
"step": 1400
},
{
"epoch": 0.050918225330119825,
"grad_norm": 3.4324212074279785,
"learning_rate": 4.745748328184935e-05,
"loss": 1.3457,
"step": 1500
},
{
"epoch": 0.05431277368546115,
"grad_norm": 1.3490582704544067,
"learning_rate": 4.7287755864082286e-05,
"loss": 1.2487,
"step": 1600
},
{
"epoch": 0.05770732204080247,
"grad_norm": 1.493403673171997,
"learning_rate": 4.711802844631522e-05,
"loss": 1.227,
"step": 1700
},
{
"epoch": 0.061101870396143794,
"grad_norm": 1.8711298704147339,
"learning_rate": 4.694830102854815e-05,
"loss": 1.2983,
"step": 1800
},
{
"epoch": 0.06449641875148511,
"grad_norm": 1.6430071592330933,
"learning_rate": 4.677857361078109e-05,
"loss": 1.3099,
"step": 1900
},
{
"epoch": 0.06789096710682643,
"grad_norm": 1.425639033317566,
"learning_rate": 4.660884619301402e-05,
"loss": 1.2607,
"step": 2000
},
{
"epoch": 0.07128551546216776,
"grad_norm": 1.7377012968063354,
"learning_rate": 4.6439118775246956e-05,
"loss": 1.2527,
"step": 2100
},
{
"epoch": 0.07468006381750908,
"grad_norm": 1.5430407524108887,
"learning_rate": 4.626939135747989e-05,
"loss": 1.2877,
"step": 2200
},
{
"epoch": 0.0780746121728504,
"grad_norm": 1.3293898105621338,
"learning_rate": 4.6101361213890495e-05,
"loss": 1.1781,
"step": 2300
},
{
"epoch": 0.08146916052819173,
"grad_norm": 2.114269733428955,
"learning_rate": 4.593163379612343e-05,
"loss": 1.3405,
"step": 2400
},
{
"epoch": 0.08486370888353305,
"grad_norm": 1.370626449584961,
"learning_rate": 4.576190637835636e-05,
"loss": 1.303,
"step": 2500
},
{
"epoch": 0.08825825723887437,
"grad_norm": 1.6202032566070557,
"learning_rate": 4.5592178960589295e-05,
"loss": 1.2427,
"step": 2600
},
{
"epoch": 0.0916528055942157,
"grad_norm": 1.6252601146697998,
"learning_rate": 4.542245154282223e-05,
"loss": 1.2768,
"step": 2700
},
{
"epoch": 0.09504735394955702,
"grad_norm": 1.4289278984069824,
"learning_rate": 4.5252724125055165e-05,
"loss": 1.2522,
"step": 2800
},
{
"epoch": 0.09844190230489833,
"grad_norm": 1.1954665184020996,
"learning_rate": 4.50829967072881e-05,
"loss": 1.3008,
"step": 2900
},
{
"epoch": 0.10183645066023965,
"grad_norm": 2.3695414066314697,
"learning_rate": 4.491326928952103e-05,
"loss": 1.3439,
"step": 3000
},
{
"epoch": 0.10523099901558097,
"grad_norm": 1.5015544891357422,
"learning_rate": 4.4743541871753965e-05,
"loss": 1.2042,
"step": 3100
},
{
"epoch": 0.1086255473709223,
"grad_norm": 1.3509881496429443,
"learning_rate": 4.45738144539869e-05,
"loss": 1.2834,
"step": 3200
},
{
"epoch": 0.11202009572626362,
"grad_norm": 1.1645573377609253,
"learning_rate": 4.4404087036219835e-05,
"loss": 1.3342,
"step": 3300
},
{
"epoch": 0.11541464408160494,
"grad_norm": 1.3376731872558594,
"learning_rate": 4.4234359618452764e-05,
"loss": 1.2501,
"step": 3400
},
{
"epoch": 0.11880919243694626,
"grad_norm": 1.184652328491211,
"learning_rate": 4.40646322006857e-05,
"loss": 1.2067,
"step": 3500
},
{
"epoch": 0.12220374079228759,
"grad_norm": 1.5623388290405273,
"learning_rate": 4.3894904782918635e-05,
"loss": 1.2109,
"step": 3600
},
{
"epoch": 0.1255982891476289,
"grad_norm": 1.537017583847046,
"learning_rate": 4.372517736515157e-05,
"loss": 1.2207,
"step": 3700
},
{
"epoch": 0.12899283750297022,
"grad_norm": 1.5127204656600952,
"learning_rate": 4.3555449947384505e-05,
"loss": 1.3273,
"step": 3800
},
{
"epoch": 0.13238738585831156,
"grad_norm": 1.504813313484192,
"learning_rate": 4.3385722529617434e-05,
"loss": 1.2347,
"step": 3900
},
{
"epoch": 0.13578193421365287,
"grad_norm": 1.5462582111358643,
"learning_rate": 4.321599511185037e-05,
"loss": 1.3142,
"step": 4000
},
{
"epoch": 0.1391764825689942,
"grad_norm": 1.377742886543274,
"learning_rate": 4.3046267694083305e-05,
"loss": 1.2685,
"step": 4100
},
{
"epoch": 0.1425710309243355,
"grad_norm": 1.5139976739883423,
"learning_rate": 4.287654027631624e-05,
"loss": 1.2718,
"step": 4200
},
{
"epoch": 0.14596557927967685,
"grad_norm": 1.2067663669586182,
"learning_rate": 4.2706812858549175e-05,
"loss": 1.2405,
"step": 4300
},
{
"epoch": 0.14936012763501816,
"grad_norm": 1.2739530801773071,
"learning_rate": 4.2537085440782104e-05,
"loss": 1.2186,
"step": 4400
},
{
"epoch": 0.1527546759903595,
"grad_norm": 1.4101356267929077,
"learning_rate": 4.236735802301504e-05,
"loss": 1.222,
"step": 4500
},
{
"epoch": 0.1561492243457008,
"grad_norm": 1.8474892377853394,
"learning_rate": 4.2197630605247975e-05,
"loss": 1.2685,
"step": 4600
},
{
"epoch": 0.1595437727010421,
"grad_norm": 1.5274946689605713,
"learning_rate": 4.202790318748091e-05,
"loss": 1.2161,
"step": 4700
},
{
"epoch": 0.16293832105638345,
"grad_norm": 1.8831485509872437,
"learning_rate": 4.1859873043891514e-05,
"loss": 1.2828,
"step": 4800
},
{
"epoch": 0.16633286941172476,
"grad_norm": 2.1567959785461426,
"learning_rate": 4.169014562612444e-05,
"loss": 1.3027,
"step": 4900
},
{
"epoch": 0.1697274177670661,
"grad_norm": 1.4506981372833252,
"learning_rate": 4.152041820835738e-05,
"loss": 1.2094,
"step": 5000
},
{
"epoch": 0.1731219661224074,
"grad_norm": 1.2342296838760376,
"learning_rate": 4.1350690790590314e-05,
"loss": 1.2523,
"step": 5100
},
{
"epoch": 0.17651651447774874,
"grad_norm": 1.6375709772109985,
"learning_rate": 4.118096337282325e-05,
"loss": 1.2418,
"step": 5200
},
{
"epoch": 0.17991106283309005,
"grad_norm": 1.3406407833099365,
"learning_rate": 4.1011235955056184e-05,
"loss": 1.2777,
"step": 5300
},
{
"epoch": 0.1833056111884314,
"grad_norm": 1.2170027494430542,
"learning_rate": 4.084150853728911e-05,
"loss": 1.1458,
"step": 5400
},
{
"epoch": 0.1867001595437727,
"grad_norm": 1.4051603078842163,
"learning_rate": 4.067178111952205e-05,
"loss": 1.2022,
"step": 5500
},
{
"epoch": 0.19009470789911403,
"grad_norm": 1.3835875988006592,
"learning_rate": 4.0502053701754984e-05,
"loss": 1.2601,
"step": 5600
},
{
"epoch": 0.19348925625445534,
"grad_norm": 1.7600008249282837,
"learning_rate": 4.033232628398791e-05,
"loss": 1.2172,
"step": 5700
},
{
"epoch": 0.19688380460979665,
"grad_norm": 1.4803307056427002,
"learning_rate": 4.0162598866220854e-05,
"loss": 1.3049,
"step": 5800
},
{
"epoch": 0.200278352965138,
"grad_norm": 1.2911592721939087,
"learning_rate": 3.999287144845378e-05,
"loss": 1.2018,
"step": 5900
},
{
"epoch": 0.2036729013204793,
"grad_norm": 1.5094434022903442,
"learning_rate": 3.982314403068672e-05,
"loss": 1.1798,
"step": 6000
},
{
"epoch": 0.20706744967582064,
"grad_norm": 1.44119131565094,
"learning_rate": 3.9653416612919654e-05,
"loss": 1.2012,
"step": 6100
},
{
"epoch": 0.21046199803116195,
"grad_norm": 1.3899762630462646,
"learning_rate": 3.948368919515258e-05,
"loss": 1.1851,
"step": 6200
},
{
"epoch": 0.21385654638650328,
"grad_norm": 2.047968864440918,
"learning_rate": 3.9313961777385524e-05,
"loss": 1.2239,
"step": 6300
},
{
"epoch": 0.2172510947418446,
"grad_norm": 1.827493667602539,
"learning_rate": 3.914423435961845e-05,
"loss": 1.2358,
"step": 6400
},
{
"epoch": 0.22064564309718593,
"grad_norm": 1.4631316661834717,
"learning_rate": 3.8976204216029064e-05,
"loss": 1.2058,
"step": 6500
},
{
"epoch": 0.22404019145252724,
"grad_norm": 1.5218262672424316,
"learning_rate": 3.880647679826199e-05,
"loss": 1.1717,
"step": 6600
},
{
"epoch": 0.22743473980786857,
"grad_norm": 1.3896803855895996,
"learning_rate": 3.863674938049492e-05,
"loss": 1.1912,
"step": 6700
},
{
"epoch": 0.23082928816320988,
"grad_norm": 1.587547779083252,
"learning_rate": 3.846702196272786e-05,
"loss": 1.2109,
"step": 6800
},
{
"epoch": 0.23422383651855122,
"grad_norm": 1.35820472240448,
"learning_rate": 3.829729454496079e-05,
"loss": 1.1828,
"step": 6900
},
{
"epoch": 0.23761838487389253,
"grad_norm": 1.2581636905670166,
"learning_rate": 3.8127567127193734e-05,
"loss": 1.1701,
"step": 7000
},
{
"epoch": 0.24101293322923384,
"grad_norm": 1.617680549621582,
"learning_rate": 3.795783970942666e-05,
"loss": 1.2159,
"step": 7100
},
{
"epoch": 0.24440748158457518,
"grad_norm": 1.3621796369552612,
"learning_rate": 3.778811229165959e-05,
"loss": 1.1951,
"step": 7200
},
{
"epoch": 0.24780202993991648,
"grad_norm": 1.8783783912658691,
"learning_rate": 3.761838487389253e-05,
"loss": 1.2664,
"step": 7300
},
{
"epoch": 0.2511965782952578,
"grad_norm": 1.1315891742706299,
"learning_rate": 3.744865745612546e-05,
"loss": 1.206,
"step": 7400
},
{
"epoch": 0.25459112665059913,
"grad_norm": 1.3531254529953003,
"learning_rate": 3.7278930038358404e-05,
"loss": 1.152,
"step": 7500
},
{
"epoch": 0.25798567500594044,
"grad_norm": 1.7136415243148804,
"learning_rate": 3.710920262059133e-05,
"loss": 1.3282,
"step": 7600
},
{
"epoch": 0.2613802233612818,
"grad_norm": 1.5798516273498535,
"learning_rate": 3.693947520282426e-05,
"loss": 1.2192,
"step": 7700
},
{
"epoch": 0.2647747717166231,
"grad_norm": 2.0638535022735596,
"learning_rate": 3.67697477850572e-05,
"loss": 1.2233,
"step": 7800
},
{
"epoch": 0.2681693200719644,
"grad_norm": 1.6473902463912964,
"learning_rate": 3.660002036729013e-05,
"loss": 1.2384,
"step": 7900
},
{
"epoch": 0.27156386842730573,
"grad_norm": 1.4174180030822754,
"learning_rate": 3.643029294952307e-05,
"loss": 1.3018,
"step": 8000
},
{
"epoch": 0.27495841678264704,
"grad_norm": 1.470323920249939,
"learning_rate": 3.6260565531756e-05,
"loss": 1.1662,
"step": 8100
},
{
"epoch": 0.2783529651379884,
"grad_norm": 1.1814874410629272,
"learning_rate": 3.609083811398893e-05,
"loss": 1.1641,
"step": 8200
},
{
"epoch": 0.2817475134933297,
"grad_norm": 1.496795892715454,
"learning_rate": 3.592111069622187e-05,
"loss": 1.208,
"step": 8300
},
{
"epoch": 0.285142061848671,
"grad_norm": 1.602959394454956,
"learning_rate": 3.57513832784548e-05,
"loss": 1.2323,
"step": 8400
},
{
"epoch": 0.28853661020401233,
"grad_norm": 1.4096314907073975,
"learning_rate": 3.558165586068774e-05,
"loss": 1.1804,
"step": 8500
},
{
"epoch": 0.2919311585593537,
"grad_norm": 1.2292312383651733,
"learning_rate": 3.541192844292067e-05,
"loss": 1.1747,
"step": 8600
},
{
"epoch": 0.295325706914695,
"grad_norm": 1.3961174488067627,
"learning_rate": 3.52422010251536e-05,
"loss": 1.1459,
"step": 8700
},
{
"epoch": 0.2987202552700363,
"grad_norm": 1.2199640274047852,
"learning_rate": 3.507247360738654e-05,
"loss": 1.1992,
"step": 8800
},
{
"epoch": 0.3021148036253776,
"grad_norm": 1.316805362701416,
"learning_rate": 3.490274618961947e-05,
"loss": 1.2202,
"step": 8900
},
{
"epoch": 0.305509351980719,
"grad_norm": 1.3120840787887573,
"learning_rate": 3.473301877185241e-05,
"loss": 1.1095,
"step": 9000
},
{
"epoch": 0.3089039003360603,
"grad_norm": 1.14743971824646,
"learning_rate": 3.456329135408534e-05,
"loss": 1.2154,
"step": 9100
},
{
"epoch": 0.3122984486914016,
"grad_norm": 1.6754459142684937,
"learning_rate": 3.439356393631827e-05,
"loss": 1.1927,
"step": 9200
},
{
"epoch": 0.3156929970467429,
"grad_norm": 1.2429569959640503,
"learning_rate": 3.422383651855121e-05,
"loss": 1.2623,
"step": 9300
},
{
"epoch": 0.3190875454020842,
"grad_norm": 1.5485316514968872,
"learning_rate": 3.405410910078414e-05,
"loss": 1.176,
"step": 9400
},
{
"epoch": 0.3224820937574256,
"grad_norm": 1.3292936086654663,
"learning_rate": 3.388438168301708e-05,
"loss": 1.1637,
"step": 9500
},
{
"epoch": 0.3258766421127669,
"grad_norm": 1.4114725589752197,
"learning_rate": 3.371465426525001e-05,
"loss": 1.151,
"step": 9600
},
{
"epoch": 0.3292711904681082,
"grad_norm": 1.6183195114135742,
"learning_rate": 3.354492684748294e-05,
"loss": 1.1929,
"step": 9700
},
{
"epoch": 0.3326657388234495,
"grad_norm": 1.7640340328216553,
"learning_rate": 3.3375199429715876e-05,
"loss": 1.1472,
"step": 9800
},
{
"epoch": 0.3360602871787909,
"grad_norm": 1.300631046295166,
"learning_rate": 3.320547201194881e-05,
"loss": 1.2158,
"step": 9900
},
{
"epoch": 0.3394548355341322,
"grad_norm": 1.5510449409484863,
"learning_rate": 3.303574459418175e-05,
"loss": 1.231,
"step": 10000
},
{
"epoch": 0.3428493838894735,
"grad_norm": 1.7029348611831665,
"learning_rate": 3.286601717641468e-05,
"loss": 1.152,
"step": 10100
},
{
"epoch": 0.3462439322448148,
"grad_norm": 2.094801902770996,
"learning_rate": 3.269628975864761e-05,
"loss": 1.1792,
"step": 10200
},
{
"epoch": 0.3496384806001562,
"grad_norm": 1.2476887702941895,
"learning_rate": 3.2526562340880546e-05,
"loss": 1.1297,
"step": 10300
},
{
"epoch": 0.3530330289554975,
"grad_norm": 1.2222412824630737,
"learning_rate": 3.235683492311348e-05,
"loss": 1.2194,
"step": 10400
},
{
"epoch": 0.3564275773108388,
"grad_norm": 1.2689149379730225,
"learning_rate": 3.218710750534642e-05,
"loss": 1.109,
"step": 10500
},
{
"epoch": 0.3598221256661801,
"grad_norm": 1.1400436162948608,
"learning_rate": 3.201738008757935e-05,
"loss": 1.1778,
"step": 10600
},
{
"epoch": 0.3632166740215214,
"grad_norm": 1.5304007530212402,
"learning_rate": 3.184765266981228e-05,
"loss": 1.2015,
"step": 10700
},
{
"epoch": 0.3666112223768628,
"grad_norm": 1.4382191896438599,
"learning_rate": 3.1677925252045216e-05,
"loss": 1.2023,
"step": 10800
},
{
"epoch": 0.3700057707322041,
"grad_norm": 1.2539787292480469,
"learning_rate": 3.150819783427815e-05,
"loss": 1.1627,
"step": 10900
},
{
"epoch": 0.3734003190875454,
"grad_norm": 1.6526975631713867,
"learning_rate": 3.133847041651109e-05,
"loss": 1.1909,
"step": 11000
},
{
"epoch": 0.3767948674428867,
"grad_norm": 1.477150559425354,
"learning_rate": 3.1168742998744016e-05,
"loss": 1.1767,
"step": 11100
},
{
"epoch": 0.38018941579822807,
"grad_norm": 1.655372142791748,
"learning_rate": 3.100071285515462e-05,
"loss": 1.1715,
"step": 11200
},
{
"epoch": 0.3835839641535694,
"grad_norm": 1.237518310546875,
"learning_rate": 3.0830985437387555e-05,
"loss": 1.1148,
"step": 11300
},
{
"epoch": 0.3869785125089107,
"grad_norm": 2.0262339115142822,
"learning_rate": 3.066125801962049e-05,
"loss": 1.056,
"step": 11400
},
{
"epoch": 0.390373060864252,
"grad_norm": 1.4669376611709595,
"learning_rate": 3.0491530601853423e-05,
"loss": 1.1773,
"step": 11500
},
{
"epoch": 0.3937676092195933,
"grad_norm": 1.6047866344451904,
"learning_rate": 3.032180318408636e-05,
"loss": 1.1846,
"step": 11600
},
{
"epoch": 0.39716215757493467,
"grad_norm": 1.5415077209472656,
"learning_rate": 3.0152075766319293e-05,
"loss": 1.1481,
"step": 11700
},
{
"epoch": 0.400556705930276,
"grad_norm": 1.2356903553009033,
"learning_rate": 2.9982348348552225e-05,
"loss": 1.1914,
"step": 11800
},
{
"epoch": 0.4039512542856173,
"grad_norm": 1.691815733909607,
"learning_rate": 2.9814318204962833e-05,
"loss": 1.2595,
"step": 11900
},
{
"epoch": 0.4073458026409586,
"grad_norm": 1.3964107036590576,
"learning_rate": 2.9644590787195765e-05,
"loss": 1.137,
"step": 12000
},
{
"epoch": 0.41074035099629996,
"grad_norm": 1.4641882181167603,
"learning_rate": 2.94748633694287e-05,
"loss": 1.2194,
"step": 12100
},
{
"epoch": 0.4141348993516413,
"grad_norm": 1.2686254978179932,
"learning_rate": 2.9305135951661632e-05,
"loss": 1.1666,
"step": 12200
},
{
"epoch": 0.4175294477069826,
"grad_norm": 1.5064525604248047,
"learning_rate": 2.9135408533894564e-05,
"loss": 1.2265,
"step": 12300
},
{
"epoch": 0.4209239960623239,
"grad_norm": 1.3071587085723877,
"learning_rate": 2.8965681116127503e-05,
"loss": 1.0625,
"step": 12400
},
{
"epoch": 0.42431854441766526,
"grad_norm": 1.4859912395477295,
"learning_rate": 2.8795953698360435e-05,
"loss": 1.1239,
"step": 12500
},
{
"epoch": 0.42771309277300656,
"grad_norm": 1.4131548404693604,
"learning_rate": 2.862622628059337e-05,
"loss": 1.2125,
"step": 12600
},
{
"epoch": 0.4311076411283479,
"grad_norm": 1.1708953380584717,
"learning_rate": 2.8456498862826302e-05,
"loss": 1.145,
"step": 12700
},
{
"epoch": 0.4345021894836892,
"grad_norm": 1.4931575059890747,
"learning_rate": 2.8286771445059234e-05,
"loss": 1.102,
"step": 12800
},
{
"epoch": 0.4378967378390305,
"grad_norm": 1.6308887004852295,
"learning_rate": 2.8117044027292173e-05,
"loss": 1.1574,
"step": 12900
},
{
"epoch": 0.44129128619437186,
"grad_norm": 1.532914638519287,
"learning_rate": 2.7947316609525105e-05,
"loss": 1.1901,
"step": 13000
},
{
"epoch": 0.44468583454971317,
"grad_norm": 1.5746792554855347,
"learning_rate": 2.7777589191758037e-05,
"loss": 1.2077,
"step": 13100
},
{
"epoch": 0.4480803829050545,
"grad_norm": 1.7640366554260254,
"learning_rate": 2.7607861773990972e-05,
"loss": 1.2147,
"step": 13200
},
{
"epoch": 0.4514749312603958,
"grad_norm": 1.4942810535430908,
"learning_rate": 2.7438134356223904e-05,
"loss": 1.2471,
"step": 13300
},
{
"epoch": 0.45486947961573715,
"grad_norm": 1.449723243713379,
"learning_rate": 2.7268406938456843e-05,
"loss": 1.1991,
"step": 13400
},
{
"epoch": 0.45826402797107846,
"grad_norm": 1.0219964981079102,
"learning_rate": 2.7098679520689775e-05,
"loss": 1.0989,
"step": 13500
},
{
"epoch": 0.46165857632641977,
"grad_norm": 1.4733655452728271,
"learning_rate": 2.6928952102922707e-05,
"loss": 1.1652,
"step": 13600
},
{
"epoch": 0.4650531246817611,
"grad_norm": 1.4748992919921875,
"learning_rate": 2.6759224685155642e-05,
"loss": 1.1522,
"step": 13700
},
{
"epoch": 0.46844767303710244,
"grad_norm": 1.918239712715149,
"learning_rate": 2.6589497267388574e-05,
"loss": 1.0624,
"step": 13800
},
{
"epoch": 0.47184222139244375,
"grad_norm": 1.4620022773742676,
"learning_rate": 2.6419769849621513e-05,
"loss": 1.2269,
"step": 13900
},
{
"epoch": 0.47523676974778506,
"grad_norm": 1.647291898727417,
"learning_rate": 2.6250042431854445e-05,
"loss": 1.0928,
"step": 14000
},
{
"epoch": 0.47863131810312637,
"grad_norm": 1.4002645015716553,
"learning_rate": 2.6080315014087377e-05,
"loss": 1.1475,
"step": 14100
},
{
"epoch": 0.4820258664584677,
"grad_norm": 1.329160451889038,
"learning_rate": 2.5910587596320312e-05,
"loss": 1.1787,
"step": 14200
},
{
"epoch": 0.48542041481380904,
"grad_norm": 1.0468798875808716,
"learning_rate": 2.5740860178553244e-05,
"loss": 1.1257,
"step": 14300
},
{
"epoch": 0.48881496316915035,
"grad_norm": 1.1814810037612915,
"learning_rate": 2.5571132760786176e-05,
"loss": 1.2252,
"step": 14400
},
{
"epoch": 0.49220951152449166,
"grad_norm": 1.442358136177063,
"learning_rate": 2.5401405343019115e-05,
"loss": 1.1474,
"step": 14500
},
{
"epoch": 0.49560405987983297,
"grad_norm": 1.2082366943359375,
"learning_rate": 2.5231677925252044e-05,
"loss": 1.1271,
"step": 14600
},
{
"epoch": 0.49899860823517433,
"grad_norm": 1.3044782876968384,
"learning_rate": 2.5061950507484982e-05,
"loss": 1.1204,
"step": 14700
},
{
"epoch": 0.5023931565905156,
"grad_norm": 1.257338047027588,
"learning_rate": 2.4893920363895583e-05,
"loss": 1.1891,
"step": 14800
},
{
"epoch": 0.505787704945857,
"grad_norm": 1.6963568925857544,
"learning_rate": 2.472419294612852e-05,
"loss": 1.0711,
"step": 14900
},
{
"epoch": 0.5091822533011983,
"grad_norm": 1.4593158960342407,
"learning_rate": 2.4554465528361454e-05,
"loss": 1.1764,
"step": 15000
},
{
"epoch": 0.5125768016565396,
"grad_norm": 1.2803332805633545,
"learning_rate": 2.438473811059439e-05,
"loss": 1.1213,
"step": 15100
},
{
"epoch": 0.5159713500118809,
"grad_norm": 1.0880329608917236,
"learning_rate": 2.421501069282732e-05,
"loss": 1.0686,
"step": 15200
},
{
"epoch": 0.5193658983672222,
"grad_norm": 1.350434422492981,
"learning_rate": 2.4045283275060253e-05,
"loss": 1.1244,
"step": 15300
},
{
"epoch": 0.5227604467225636,
"grad_norm": 1.4851505756378174,
"learning_rate": 2.387555585729319e-05,
"loss": 1.1519,
"step": 15400
},
{
"epoch": 0.5261549950779049,
"grad_norm": 1.4524593353271484,
"learning_rate": 2.3705828439526124e-05,
"loss": 1.1139,
"step": 15500
},
{
"epoch": 0.5295495434332462,
"grad_norm": 1.3715015649795532,
"learning_rate": 2.3536101021759056e-05,
"loss": 1.1176,
"step": 15600
},
{
"epoch": 0.5329440917885875,
"grad_norm": 1.3227180242538452,
"learning_rate": 2.3366373603991988e-05,
"loss": 1.1547,
"step": 15700
},
{
"epoch": 0.5363386401439288,
"grad_norm": 1.742480754852295,
"learning_rate": 2.3196646186224923e-05,
"loss": 1.2338,
"step": 15800
},
{
"epoch": 0.5397331884992702,
"grad_norm": 1.3990530967712402,
"learning_rate": 2.302691876845786e-05,
"loss": 1.1808,
"step": 15900
},
{
"epoch": 0.5431277368546115,
"grad_norm": 1.6087653636932373,
"learning_rate": 2.285719135069079e-05,
"loss": 1.2029,
"step": 16000
},
{
"epoch": 0.5465222852099528,
"grad_norm": 1.3504618406295776,
"learning_rate": 2.2687463932923726e-05,
"loss": 1.138,
"step": 16100
},
{
"epoch": 0.5499168335652941,
"grad_norm": 1.226248025894165,
"learning_rate": 2.2517736515156658e-05,
"loss": 1.1006,
"step": 16200
},
{
"epoch": 0.5533113819206354,
"grad_norm": 1.0794544219970703,
"learning_rate": 2.2348009097389593e-05,
"loss": 1.111,
"step": 16300
},
{
"epoch": 0.5567059302759768,
"grad_norm": 1.3800761699676514,
"learning_rate": 2.217828167962253e-05,
"loss": 1.1554,
"step": 16400
},
{
"epoch": 0.5601004786313181,
"grad_norm": 1.1783385276794434,
"learning_rate": 2.200855426185546e-05,
"loss": 1.157,
"step": 16500
},
{
"epoch": 0.5634950269866594,
"grad_norm": 1.483588457107544,
"learning_rate": 2.1838826844088396e-05,
"loss": 1.1443,
"step": 16600
},
{
"epoch": 0.5668895753420008,
"grad_norm": 1.847670555114746,
"learning_rate": 2.1669099426321328e-05,
"loss": 1.1667,
"step": 16700
},
{
"epoch": 0.570284123697342,
"grad_norm": 1.524003028869629,
"learning_rate": 2.1499372008554263e-05,
"loss": 1.1555,
"step": 16800
},
{
"epoch": 0.5736786720526834,
"grad_norm": 1.6308820247650146,
"learning_rate": 2.1329644590787195e-05,
"loss": 1.0674,
"step": 16900
},
{
"epoch": 0.5770732204080247,
"grad_norm": 1.4396891593933105,
"learning_rate": 2.115991717302013e-05,
"loss": 1.1481,
"step": 17000
},
{
"epoch": 0.580467768763366,
"grad_norm": 1.6904021501541138,
"learning_rate": 2.0990189755253066e-05,
"loss": 1.044,
"step": 17100
},
{
"epoch": 0.5838623171187074,
"grad_norm": 1.8386590480804443,
"learning_rate": 2.0820462337485998e-05,
"loss": 1.0662,
"step": 17200
},
{
"epoch": 0.5872568654740487,
"grad_norm": 1.3602131605148315,
"learning_rate": 2.0650734919718933e-05,
"loss": 1.072,
"step": 17300
},
{
"epoch": 0.59065141382939,
"grad_norm": 1.2853094339370728,
"learning_rate": 2.0481007501951865e-05,
"loss": 1.1799,
"step": 17400
},
{
"epoch": 0.5940459621847313,
"grad_norm": 1.418142557144165,
"learning_rate": 2.03112800841848e-05,
"loss": 1.1163,
"step": 17500
},
{
"epoch": 0.5974405105400726,
"grad_norm": 1.3810557126998901,
"learning_rate": 2.0141552666417736e-05,
"loss": 1.1246,
"step": 17600
},
{
"epoch": 0.600835058895414,
"grad_norm": 1.3166576623916626,
"learning_rate": 1.9971825248650668e-05,
"loss": 1.0635,
"step": 17700
},
{
"epoch": 0.6042296072507553,
"grad_norm": 1.2918510437011719,
"learning_rate": 1.98020978308836e-05,
"loss": 1.1338,
"step": 17800
},
{
"epoch": 0.6076241556060966,
"grad_norm": 1.3206653594970703,
"learning_rate": 1.9632370413116535e-05,
"loss": 1.1538,
"step": 17900
},
{
"epoch": 0.611018703961438,
"grad_norm": 1.1084457635879517,
"learning_rate": 1.946264299534947e-05,
"loss": 1.0151,
"step": 18000
},
{
"epoch": 0.6144132523167792,
"grad_norm": 1.6946609020233154,
"learning_rate": 1.9292915577582406e-05,
"loss": 1.173,
"step": 18100
},
{
"epoch": 0.6178078006721206,
"grad_norm": 1.5061676502227783,
"learning_rate": 1.9123188159815334e-05,
"loss": 1.1463,
"step": 18200
},
{
"epoch": 0.6212023490274619,
"grad_norm": 1.400976300239563,
"learning_rate": 1.895346074204827e-05,
"loss": 1.147,
"step": 18300
},
{
"epoch": 0.6245968973828032,
"grad_norm": 1.398390769958496,
"learning_rate": 1.8783733324281205e-05,
"loss": 1.0824,
"step": 18400
},
{
"epoch": 0.6279914457381446,
"grad_norm": 1.0492353439331055,
"learning_rate": 1.861570318069181e-05,
"loss": 1.1912,
"step": 18500
},
{
"epoch": 0.6313859940934858,
"grad_norm": 1.5323091745376587,
"learning_rate": 1.8445975762924745e-05,
"loss": 1.1324,
"step": 18600
},
{
"epoch": 0.6347805424488272,
"grad_norm": 1.2045379877090454,
"learning_rate": 1.827624834515768e-05,
"loss": 1.1011,
"step": 18700
},
{
"epoch": 0.6381750908041685,
"grad_norm": 1.4627662897109985,
"learning_rate": 1.8106520927390612e-05,
"loss": 1.0789,
"step": 18800
},
{
"epoch": 0.6415696391595098,
"grad_norm": 1.492099642753601,
"learning_rate": 1.7936793509623544e-05,
"loss": 1.1794,
"step": 18900
},
{
"epoch": 0.6449641875148512,
"grad_norm": 1.8160879611968994,
"learning_rate": 1.776706609185648e-05,
"loss": 1.087,
"step": 19000
},
{
"epoch": 0.6483587358701924,
"grad_norm": 1.135730504989624,
"learning_rate": 1.7597338674089415e-05,
"loss": 1.1466,
"step": 19100
},
{
"epoch": 0.6517532842255338,
"grad_norm": 1.2633298635482788,
"learning_rate": 1.7427611256322347e-05,
"loss": 1.1453,
"step": 19200
},
{
"epoch": 0.6551478325808752,
"grad_norm": 1.3639088869094849,
"learning_rate": 1.725788383855528e-05,
"loss": 1.1772,
"step": 19300
},
{
"epoch": 0.6585423809362164,
"grad_norm": 1.5371415615081787,
"learning_rate": 1.7088156420788214e-05,
"loss": 1.1676,
"step": 19400
},
{
"epoch": 0.6619369292915578,
"grad_norm": 1.1205295324325562,
"learning_rate": 1.691842900302115e-05,
"loss": 1.1312,
"step": 19500
},
{
"epoch": 0.665331477646899,
"grad_norm": 1.3705852031707764,
"learning_rate": 1.6748701585254085e-05,
"loss": 1.2032,
"step": 19600
},
{
"epoch": 0.6687260260022404,
"grad_norm": 1.6704633235931396,
"learning_rate": 1.6578974167487017e-05,
"loss": 1.1594,
"step": 19700
},
{
"epoch": 0.6721205743575818,
"grad_norm": 1.3317358493804932,
"learning_rate": 1.640924674971995e-05,
"loss": 1.1118,
"step": 19800
},
{
"epoch": 0.675515122712923,
"grad_norm": 1.666467547416687,
"learning_rate": 1.6239519331952884e-05,
"loss": 1.1402,
"step": 19900
},
{
"epoch": 0.6789096710682644,
"grad_norm": 1.5140140056610107,
"learning_rate": 1.606979191418582e-05,
"loss": 1.0712,
"step": 20000
},
{
"epoch": 0.6823042194236056,
"grad_norm": 1.5290478467941284,
"learning_rate": 1.590006449641875e-05,
"loss": 1.1054,
"step": 20100
},
{
"epoch": 0.685698767778947,
"grad_norm": 1.408411979675293,
"learning_rate": 1.5730337078651687e-05,
"loss": 1.1755,
"step": 20200
},
{
"epoch": 0.6890933161342884,
"grad_norm": 1.8979178667068481,
"learning_rate": 1.556060966088462e-05,
"loss": 1.0911,
"step": 20300
},
{
"epoch": 0.6924878644896296,
"grad_norm": 1.3804025650024414,
"learning_rate": 1.5390882243117554e-05,
"loss": 1.1299,
"step": 20400
},
{
"epoch": 0.695882412844971,
"grad_norm": 1.1603401899337769,
"learning_rate": 1.5221154825350486e-05,
"loss": 1.081,
"step": 20500
},
{
"epoch": 0.6992769612003124,
"grad_norm": 1.4648966789245605,
"learning_rate": 1.5051427407583421e-05,
"loss": 1.1228,
"step": 20600
},
{
"epoch": 0.7026715095556536,
"grad_norm": 1.589272379875183,
"learning_rate": 1.4881699989816355e-05,
"loss": 1.1068,
"step": 20700
},
{
"epoch": 0.706066057910995,
"grad_norm": 1.337220311164856,
"learning_rate": 1.471197257204929e-05,
"loss": 1.1538,
"step": 20800
},
{
"epoch": 0.7094606062663362,
"grad_norm": 1.5323350429534912,
"learning_rate": 1.4542245154282224e-05,
"loss": 1.0592,
"step": 20900
},
{
"epoch": 0.7128551546216776,
"grad_norm": 1.6231937408447266,
"learning_rate": 1.4372517736515156e-05,
"loss": 1.1526,
"step": 21000
},
{
"epoch": 0.716249702977019,
"grad_norm": 1.8754550218582153,
"learning_rate": 1.4202790318748091e-05,
"loss": 1.0773,
"step": 21100
},
{
"epoch": 0.7196442513323602,
"grad_norm": 1.1128793954849243,
"learning_rate": 1.4033062900981025e-05,
"loss": 1.0372,
"step": 21200
},
{
"epoch": 0.7230387996877016,
"grad_norm": 1.5695431232452393,
"learning_rate": 1.386333548321396e-05,
"loss": 1.1535,
"step": 21300
},
{
"epoch": 0.7264333480430428,
"grad_norm": 1.327945351600647,
"learning_rate": 1.3693608065446892e-05,
"loss": 1.1107,
"step": 21400
},
{
"epoch": 0.7298278963983842,
"grad_norm": 1.3291347026824951,
"learning_rate": 1.3523880647679826e-05,
"loss": 1.1303,
"step": 21500
},
{
"epoch": 0.7332224447537256,
"grad_norm": 1.3102412223815918,
"learning_rate": 1.3354153229912761e-05,
"loss": 1.1271,
"step": 21600
},
{
"epoch": 0.7366169931090668,
"grad_norm": 1.532332181930542,
"learning_rate": 1.3184425812145695e-05,
"loss": 1.1259,
"step": 21700
},
{
"epoch": 0.7400115414644082,
"grad_norm": 1.8076393604278564,
"learning_rate": 1.301469839437863e-05,
"loss": 1.0277,
"step": 21800
},
{
"epoch": 0.7434060898197494,
"grad_norm": 1.560998558998108,
"learning_rate": 1.2844970976611562e-05,
"loss": 1.0944,
"step": 21900
},
{
"epoch": 0.7468006381750908,
"grad_norm": 1.4512039422988892,
"learning_rate": 1.2675243558844496e-05,
"loss": 1.1439,
"step": 22000
},
{
"epoch": 0.7501951865304322,
"grad_norm": 1.1790564060211182,
"learning_rate": 1.2505516141077431e-05,
"loss": 1.1109,
"step": 22100
},
{
"epoch": 0.7535897348857734,
"grad_norm": 0.8725073337554932,
"learning_rate": 1.2337485997488036e-05,
"loss": 1.1064,
"step": 22200
},
{
"epoch": 0.7569842832411148,
"grad_norm": 1.7705230712890625,
"learning_rate": 1.2167758579720968e-05,
"loss": 1.1094,
"step": 22300
},
{
"epoch": 0.7603788315964561,
"grad_norm": 1.72670578956604,
"learning_rate": 1.1998031161953903e-05,
"loss": 1.0784,
"step": 22400
},
{
"epoch": 0.7637733799517974,
"grad_norm": 1.0623925924301147,
"learning_rate": 1.1828303744186837e-05,
"loss": 1.1441,
"step": 22500
},
{
"epoch": 0.7671679283071388,
"grad_norm": 1.4572324752807617,
"learning_rate": 1.165857632641977e-05,
"loss": 1.0754,
"step": 22600
},
{
"epoch": 0.77056247666248,
"grad_norm": 1.4778876304626465,
"learning_rate": 1.1488848908652704e-05,
"loss": 1.0816,
"step": 22700
},
{
"epoch": 0.7739570250178214,
"grad_norm": 1.5544917583465576,
"learning_rate": 1.1319121490885638e-05,
"loss": 1.1449,
"step": 22800
},
{
"epoch": 0.7773515733731627,
"grad_norm": 1.4993566274642944,
"learning_rate": 1.1149394073118571e-05,
"loss": 1.0315,
"step": 22900
},
{
"epoch": 0.780746121728504,
"grad_norm": 1.5602749586105347,
"learning_rate": 1.0979666655351507e-05,
"loss": 1.151,
"step": 23000
},
{
"epoch": 0.7841406700838454,
"grad_norm": 1.2788993120193481,
"learning_rate": 1.080993923758444e-05,
"loss": 1.1134,
"step": 23100
},
{
"epoch": 0.7875352184391866,
"grad_norm": 1.4655214548110962,
"learning_rate": 1.0640211819817374e-05,
"loss": 1.1033,
"step": 23200
},
{
"epoch": 0.790929766794528,
"grad_norm": 1.4986985921859741,
"learning_rate": 1.0470484402050308e-05,
"loss": 1.1025,
"step": 23300
},
{
"epoch": 0.7943243151498693,
"grad_norm": 1.651713490486145,
"learning_rate": 1.0300756984283241e-05,
"loss": 1.173,
"step": 23400
},
{
"epoch": 0.7977188635052106,
"grad_norm": 1.4241468906402588,
"learning_rate": 1.0131029566516175e-05,
"loss": 1.1499,
"step": 23500
},
{
"epoch": 0.801113411860552,
"grad_norm": 1.4987541437149048,
"learning_rate": 9.961302148749109e-06,
"loss": 1.0355,
"step": 23600
},
{
"epoch": 0.8045079602158933,
"grad_norm": 1.6847175359725952,
"learning_rate": 9.791574730982044e-06,
"loss": 1.0974,
"step": 23700
},
{
"epoch": 0.8079025085712346,
"grad_norm": 1.319767713546753,
"learning_rate": 9.621847313214976e-06,
"loss": 1.0246,
"step": 23800
},
{
"epoch": 0.8112970569265759,
"grad_norm": 0.8837277293205261,
"learning_rate": 9.452119895447911e-06,
"loss": 1.0828,
"step": 23900
},
{
"epoch": 0.8146916052819172,
"grad_norm": 1.3911470174789429,
"learning_rate": 9.282392477680845e-06,
"loss": 1.1347,
"step": 24000
},
{
"epoch": 0.8180861536372586,
"grad_norm": 1.6268776655197144,
"learning_rate": 9.112665059913779e-06,
"loss": 1.1426,
"step": 24100
},
{
"epoch": 0.8214807019925999,
"grad_norm": 1.229019284248352,
"learning_rate": 8.942937642146714e-06,
"loss": 1.147,
"step": 24200
},
{
"epoch": 0.8248752503479412,
"grad_norm": 1.4097239971160889,
"learning_rate": 8.773210224379646e-06,
"loss": 1.1377,
"step": 24300
},
{
"epoch": 0.8282697987032825,
"grad_norm": 1.1406160593032837,
"learning_rate": 8.603482806612581e-06,
"loss": 1.1035,
"step": 24400
},
{
"epoch": 0.8316643470586238,
"grad_norm": 1.0381433963775635,
"learning_rate": 8.433755388845515e-06,
"loss": 1.091,
"step": 24500
},
{
"epoch": 0.8350588954139652,
"grad_norm": 1.3789398670196533,
"learning_rate": 8.264027971078449e-06,
"loss": 1.0108,
"step": 24600
},
{
"epoch": 0.8384534437693065,
"grad_norm": 1.2343610525131226,
"learning_rate": 8.094300553311382e-06,
"loss": 1.1196,
"step": 24700
},
{
"epoch": 0.8418479921246478,
"grad_norm": 1.3978173732757568,
"learning_rate": 7.924573135544316e-06,
"loss": 1.0261,
"step": 24800
},
{
"epoch": 0.8452425404799891,
"grad_norm": 1.2678471803665161,
"learning_rate": 7.75484571777725e-06,
"loss": 1.0813,
"step": 24900
},
{
"epoch": 0.8486370888353305,
"grad_norm": 1.5607575178146362,
"learning_rate": 7.585118300010184e-06,
"loss": 1.167,
"step": 25000
},
{
"epoch": 0.8520316371906718,
"grad_norm": 1.1577645540237427,
"learning_rate": 7.415390882243117e-06,
"loss": 1.1326,
"step": 25100
},
{
"epoch": 0.8554261855460131,
"grad_norm": 1.0699902772903442,
"learning_rate": 7.2456634644760515e-06,
"loss": 1.0751,
"step": 25200
},
{
"epoch": 0.8588207339013544,
"grad_norm": 1.3334201574325562,
"learning_rate": 7.075936046708986e-06,
"loss": 1.083,
"step": 25300
},
{
"epoch": 0.8622152822566957,
"grad_norm": 1.493215799331665,
"learning_rate": 6.90620862894192e-06,
"loss": 1.152,
"step": 25400
},
{
"epoch": 0.8656098306120371,
"grad_norm": 1.5618408918380737,
"learning_rate": 6.736481211174854e-06,
"loss": 1.0474,
"step": 25500
},
{
"epoch": 0.8690043789673784,
"grad_norm": 1.4898067712783813,
"learning_rate": 6.566753793407787e-06,
"loss": 1.1261,
"step": 25600
},
{
"epoch": 0.8723989273227197,
"grad_norm": 1.1436446905136108,
"learning_rate": 6.3970263756407215e-06,
"loss": 1.0708,
"step": 25700
},
{
"epoch": 0.875793475678061,
"grad_norm": 1.4544737339019775,
"learning_rate": 6.227298957873655e-06,
"loss": 1.0788,
"step": 25800
},
{
"epoch": 0.8791880240334023,
"grad_norm": 1.0729115009307861,
"learning_rate": 6.05757154010659e-06,
"loss": 1.0525,
"step": 25900
},
{
"epoch": 0.8825825723887437,
"grad_norm": 1.5516784191131592,
"learning_rate": 5.887844122339523e-06,
"loss": 1.0874,
"step": 26000
},
{
"epoch": 0.885977120744085,
"grad_norm": 1.4837692975997925,
"learning_rate": 5.718116704572457e-06,
"loss": 1.1142,
"step": 26100
},
{
"epoch": 0.8893716690994263,
"grad_norm": 1.1491631269454956,
"learning_rate": 5.548389286805391e-06,
"loss": 1.0618,
"step": 26200
},
{
"epoch": 0.8927662174547677,
"grad_norm": 1.5417340993881226,
"learning_rate": 5.378661869038324e-06,
"loss": 0.993,
"step": 26300
},
{
"epoch": 0.896160765810109,
"grad_norm": 0.9728216528892517,
"learning_rate": 5.208934451271259e-06,
"loss": 1.1583,
"step": 26400
},
{
"epoch": 0.8995553141654503,
"grad_norm": 1.0447022914886475,
"learning_rate": 5.0392070335041925e-06,
"loss": 1.0472,
"step": 26500
},
{
"epoch": 0.9029498625207916,
"grad_norm": 1.2869070768356323,
"learning_rate": 4.869479615737126e-06,
"loss": 1.0823,
"step": 26600
},
{
"epoch": 0.9063444108761329,
"grad_norm": 1.6531902551651,
"learning_rate": 4.69975219797006e-06,
"loss": 1.0836,
"step": 26700
},
{
"epoch": 0.9097389592314743,
"grad_norm": 1.559571385383606,
"learning_rate": 4.530024780202994e-06,
"loss": 1.0009,
"step": 26800
},
{
"epoch": 0.9131335075868156,
"grad_norm": 1.3163347244262695,
"learning_rate": 4.360297362435928e-06,
"loss": 1.1214,
"step": 26900
},
{
"epoch": 0.9165280559421569,
"grad_norm": 1.1032936573028564,
"learning_rate": 4.1905699446688625e-06,
"loss": 1.113,
"step": 27000
},
{
"epoch": 0.9199226042974982,
"grad_norm": 1.4257267713546753,
"learning_rate": 4.020842526901796e-06,
"loss": 1.0477,
"step": 27100
},
{
"epoch": 0.9233171526528395,
"grad_norm": 2.0018675327301025,
"learning_rate": 3.85111510913473e-06,
"loss": 1.1487,
"step": 27200
},
{
"epoch": 0.9267117010081809,
"grad_norm": 1.38235342502594,
"learning_rate": 3.681387691367664e-06,
"loss": 1.0816,
"step": 27300
},
{
"epoch": 0.9301062493635222,
"grad_norm": 1.4731274843215942,
"learning_rate": 3.5116602736005976e-06,
"loss": 1.0882,
"step": 27400
},
{
"epoch": 0.9335007977188635,
"grad_norm": 1.225797414779663,
"learning_rate": 3.3419328558335317e-06,
"loss": 1.0551,
"step": 27500
},
{
"epoch": 0.9368953460742049,
"grad_norm": 1.597345232963562,
"learning_rate": 3.1722054380664653e-06,
"loss": 1.0565,
"step": 27600
},
{
"epoch": 0.9402898944295461,
"grad_norm": 1.092685341835022,
"learning_rate": 3.0024780202993994e-06,
"loss": 1.0821,
"step": 27700
},
{
"epoch": 0.9436844427848875,
"grad_norm": 1.3143861293792725,
"learning_rate": 2.832750602532333e-06,
"loss": 1.0049,
"step": 27800
},
{
"epoch": 0.9470789911402288,
"grad_norm": 1.1611847877502441,
"learning_rate": 2.663023184765267e-06,
"loss": 1.031,
"step": 27900
},
{
"epoch": 0.9504735394955701,
"grad_norm": 1.3087385892868042,
"learning_rate": 2.4932957669982012e-06,
"loss": 0.9714,
"step": 28000
},
{
"epoch": 0.9538680878509115,
"grad_norm": 1.118117094039917,
"learning_rate": 2.323568349231135e-06,
"loss": 1.0057,
"step": 28100
},
{
"epoch": 0.9572626362062527,
"grad_norm": 1.5035566091537476,
"learning_rate": 2.1538409314640686e-06,
"loss": 1.1181,
"step": 28200
},
{
"epoch": 0.9606571845615941,
"grad_norm": 1.372116208076477,
"learning_rate": 1.9841135136970026e-06,
"loss": 1.042,
"step": 28300
},
{
"epoch": 0.9640517329169354,
"grad_norm": 1.189626693725586,
"learning_rate": 1.8160833701076074e-06,
"loss": 1.0346,
"step": 28400
},
{
"epoch": 0.9674462812722767,
"grad_norm": 1.6013319492340088,
"learning_rate": 1.6463559523405412e-06,
"loss": 1.06,
"step": 28500
},
{
"epoch": 0.9708408296276181,
"grad_norm": 1.7186366319656372,
"learning_rate": 1.4766285345734751e-06,
"loss": 1.1076,
"step": 28600
},
{
"epoch": 0.9742353779829593,
"grad_norm": 1.3533858060836792,
"learning_rate": 1.306901116806409e-06,
"loss": 1.1391,
"step": 28700
},
{
"epoch": 0.9776299263383007,
"grad_norm": 1.34947669506073,
"learning_rate": 1.1371736990393429e-06,
"loss": 1.0501,
"step": 28800
},
{
"epoch": 0.981024474693642,
"grad_norm": 1.5835421085357666,
"learning_rate": 9.674462812722767e-07,
"loss": 1.1168,
"step": 28900
},
{
"epoch": 0.9844190230489833,
"grad_norm": 1.2659107446670532,
"learning_rate": 7.977188635052106e-07,
"loss": 1.0592,
"step": 29000
},
{
"epoch": 0.9878135714043247,
"grad_norm": 1.212120532989502,
"learning_rate": 6.279914457381446e-07,
"loss": 1.0867,
"step": 29100
},
{
"epoch": 0.9912081197596659,
"grad_norm": 1.5085951089859009,
"learning_rate": 4.582640279710785e-07,
"loss": 1.0909,
"step": 29200
},
{
"epoch": 0.9946026681150073,
"grad_norm": 1.6492177248001099,
"learning_rate": 2.885366102040124e-07,
"loss": 1.0747,
"step": 29300
},
{
"epoch": 0.9979972164703487,
"grad_norm": 1.368004322052002,
"learning_rate": 1.1880919243694626e-07,
"loss": 0.9943,
"step": 29400
}
],
"logging_steps": 100,
"max_steps": 29459,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 29459,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.93185924572119e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}