whisper-medium-fleurs-lang-id / trainer_state.json
sanchit-gandhi's picture
End of training
dc9134c
raw
history blame
125 kB
{
"best_metric": 0.8805294322535702,
"best_model_checkpoint": "whisper-medium-fleurs-lang-id/checkpoint-25482",
"epoch": 3.0,
"global_step": 25482,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.2311699047905537e-05,
"loss": 4.577,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 1.4962879132899252e-05,
"loss": 3.8674,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 1.65137200652793e-05,
"loss": 2.6297,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 1.7536787006730113e-05,
"loss": 1.9219,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 1.8405856196335375e-05,
"loss": 1.4455,
"step": 125
},
{
"epoch": 0.02,
"learning_rate": 1.9113559186646374e-05,
"loss": 1.1817,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 1.9710538296442596e-05,
"loss": 1.0727,
"step": 175
},
{
"epoch": 0.02,
"learning_rate": 2.0207431990532845e-05,
"loss": 0.945,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 2.0664400119026423e-05,
"loss": 0.7437,
"step": 225
},
{
"epoch": 0.03,
"learning_rate": 2.1072552912593918e-05,
"loss": 0.7828,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 2.1441320422849576e-05,
"loss": 0.7152,
"step": 275
},
{
"epoch": 0.04,
"learning_rate": 2.1777639264850062e-05,
"loss": 0.6484,
"step": 300
},
{
"epoch": 0.04,
"learning_rate": 2.2086760916995474e-05,
"loss": 0.5657,
"step": 325
},
{
"epoch": 0.04,
"learning_rate": 2.2372756900962695e-05,
"loss": 0.5444,
"step": 350
},
{
"epoch": 0.04,
"learning_rate": 2.2638847782608207e-05,
"loss": 0.5612,
"step": 375
},
{
"epoch": 0.05,
"learning_rate": 2.288762494051179e-05,
"loss": 0.5186,
"step": 400
},
{
"epoch": 0.05,
"learning_rate": 2.3121204488355324e-05,
"loss": 0.5468,
"step": 425
},
{
"epoch": 0.05,
"learning_rate": 2.333277054469188e-05,
"loss": 0.5036,
"step": 450
},
{
"epoch": 0.06,
"learning_rate": 2.3541374274417174e-05,
"loss": 0.404,
"step": 475
},
{
"epoch": 0.06,
"learning_rate": 2.3739186935221877e-05,
"loss": 0.4239,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 2.3927270271801667e-05,
"loss": 0.407,
"step": 525
},
{
"epoch": 0.06,
"learning_rate": 2.410653664969109e-05,
"loss": 0.4722,
"step": 550
},
{
"epoch": 0.07,
"learning_rate": 2.427777582641219e-05,
"loss": 0.4321,
"step": 575
},
{
"epoch": 0.07,
"learning_rate": 2.4441675981650245e-05,
"loss": 0.4084,
"step": 600
},
{
"epoch": 0.07,
"learning_rate": 2.4598840423254854e-05,
"loss": 0.3773,
"step": 625
},
{
"epoch": 0.08,
"learning_rate": 2.4749800994006934e-05,
"loss": 0.3504,
"step": 650
},
{
"epoch": 0.08,
"learning_rate": 2.4895028931404135e-05,
"loss": 0.3285,
"step": 675
},
{
"epoch": 0.08,
"learning_rate": 2.503494373991507e-05,
"loss": 0.3356,
"step": 700
},
{
"epoch": 0.09,
"learning_rate": 2.516992049683632e-05,
"loss": 0.3096,
"step": 725
},
{
"epoch": 0.09,
"learning_rate": 2.5300295912321995e-05,
"loss": 0.3191,
"step": 750
},
{
"epoch": 0.09,
"learning_rate": 2.542637339013335e-05,
"loss": 0.362,
"step": 775
},
{
"epoch": 0.09,
"learning_rate": 2.5548427280545428e-05,
"loss": 0.3282,
"step": 800
},
{
"epoch": 0.1,
"learning_rate": 2.5666706475381483e-05,
"loss": 0.3459,
"step": 825
},
{
"epoch": 0.1,
"learning_rate": 2.5781437463637095e-05,
"loss": 0.3781,
"step": 850
},
{
"epoch": 0.1,
"learning_rate": 2.58928269419927e-05,
"loss": 0.295,
"step": 875
},
{
"epoch": 0.11,
"learning_rate": 2.6001064055824197e-05,
"loss": 0.3198,
"step": 900
},
{
"epoch": 0.11,
"learning_rate": 2.61063223317484e-05,
"loss": 0.2867,
"step": 925
},
{
"epoch": 0.11,
"learning_rate": 2.6208761351291982e-05,
"loss": 0.2654,
"step": 950
},
{
"epoch": 0.11,
"learning_rate": 2.6308528206214112e-05,
"loss": 0.269,
"step": 975
},
{
"epoch": 0.12,
"learning_rate": 2.6405758768797805e-05,
"loss": 0.2258,
"step": 1000
},
{
"epoch": 0.12,
"learning_rate": 2.6500578804641582e-05,
"loss": 0.2228,
"step": 1025
},
{
"epoch": 0.12,
"learning_rate": 2.659310495081882e-05,
"loss": 0.2919,
"step": 1050
},
{
"epoch": 0.13,
"learning_rate": 2.6683445578490447e-05,
"loss": 0.2463,
"step": 1075
},
{
"epoch": 0.13,
"learning_rate": 2.6771701555972537e-05,
"loss": 0.2415,
"step": 1100
},
{
"epoch": 0.13,
"learning_rate": 2.6857966925733306e-05,
"loss": 0.1601,
"step": 1125
},
{
"epoch": 0.14,
"learning_rate": 2.6942329506713114e-05,
"loss": 0.2071,
"step": 1150
},
{
"epoch": 0.14,
"learning_rate": 2.7024871431639188e-05,
"loss": 0.1699,
"step": 1175
},
{
"epoch": 0.14,
"learning_rate": 2.7105669627576403e-05,
"loss": 0.267,
"step": 1200
},
{
"epoch": 0.14,
"learning_rate": 2.7184796246761484e-05,
"loss": 0.2346,
"step": 1225
},
{
"epoch": 0.15,
"learning_rate": 2.7262319053768042e-05,
"loss": 0.2792,
"step": 1250
},
{
"epoch": 0.15,
"learning_rate": 2.7338301774208634e-05,
"loss": 0.1616,
"step": 1275
},
{
"epoch": 0.15,
"learning_rate": 2.741280440946991e-05,
"loss": 0.2769,
"step": 1300
},
{
"epoch": 0.16,
"learning_rate": 2.7485883521375644e-05,
"loss": 0.1969,
"step": 1325
},
{
"epoch": 0.16,
"learning_rate": 2.7557592490160965e-05,
"loss": 0.2361,
"step": 1350
},
{
"epoch": 0.16,
"learning_rate": 2.762798174870535e-05,
"loss": 0.2082,
"step": 1375
},
{
"epoch": 0.16,
"learning_rate": 2.7697098995598905e-05,
"loss": 0.1564,
"step": 1400
},
{
"epoch": 0.17,
"learning_rate": 2.7764989389296562e-05,
"loss": 0.2453,
"step": 1425
},
{
"epoch": 0.17,
"learning_rate": 2.783169572533948e-05,
"loss": 0.2381,
"step": 1450
},
{
"epoch": 0.17,
"learning_rate": 2.7897258598385463e-05,
"loss": 0.2096,
"step": 1475
},
{
"epoch": 0.18,
"learning_rate": 2.7961716550584634e-05,
"loss": 0.2565,
"step": 1500
},
{
"epoch": 0.18,
"learning_rate": 2.8025106207658357e-05,
"loss": 0.1742,
"step": 1525
},
{
"epoch": 0.18,
"learning_rate": 2.8087462403884467e-05,
"loss": 0.1785,
"step": 1550
},
{
"epoch": 0.19,
"learning_rate": 2.814881829705666e-05,
"loss": 0.1956,
"step": 1575
},
{
"epoch": 0.19,
"learning_rate": 2.820920547436789e-05,
"loss": 0.157,
"step": 1600
},
{
"epoch": 0.19,
"learning_rate": 2.826865405006438e-05,
"loss": 0.2121,
"step": 1625
},
{
"epoch": 0.19,
"learning_rate": 2.832719275562587e-05,
"loss": 0.174,
"step": 1650
},
{
"epoch": 0.2,
"learning_rate": 2.838484902314832e-05,
"loss": 0.2442,
"step": 1675
},
{
"epoch": 0.2,
"learning_rate": 2.8441649062534663e-05,
"loss": 0.1741,
"step": 1700
},
{
"epoch": 0.2,
"learning_rate": 2.8497617933037443e-05,
"loss": 0.1698,
"step": 1725
},
{
"epoch": 0.21,
"learning_rate": 2.8552779609642014e-05,
"loss": 0.1367,
"step": 1750
},
{
"epoch": 0.21,
"learning_rate": 2.8607157044730534e-05,
"loss": 0.1653,
"step": 1775
},
{
"epoch": 0.21,
"learning_rate": 2.866077222542367e-05,
"loss": 0.1203,
"step": 1800
},
{
"epoch": 0.21,
"learning_rate": 2.8713646226958563e-05,
"loss": 0.1568,
"step": 1825
},
{
"epoch": 0.22,
"learning_rate": 2.8765799262427394e-05,
"loss": 0.1497,
"step": 1850
},
{
"epoch": 0.22,
"learning_rate": 2.8817250729170247e-05,
"loss": 0.1457,
"step": 1875
},
{
"epoch": 0.22,
"learning_rate": 2.8868019252088813e-05,
"loss": 0.1452,
"step": 1900
},
{
"epoch": 0.23,
"learning_rate": 2.8918122724122958e-05,
"loss": 0.1305,
"step": 1925
},
{
"epoch": 0.23,
"learning_rate": 2.896757834411026e-05,
"loss": 0.1641,
"step": 1950
},
{
"epoch": 0.23,
"learning_rate": 2.901640265222899e-05,
"loss": 0.1796,
"step": 1975
},
{
"epoch": 0.24,
"learning_rate": 2.9064611563207287e-05,
"loss": 0.1358,
"step": 2000
},
{
"epoch": 0.24,
"learning_rate": 2.9112220397465356e-05,
"loss": 0.0858,
"step": 2025
},
{
"epoch": 0.24,
"learning_rate": 2.9159243910343267e-05,
"loss": 0.1442,
"step": 2050
},
{
"epoch": 0.24,
"learning_rate": 2.9205696319553716e-05,
"loss": 0.1269,
"step": 2075
},
{
"epoch": 0.25,
"learning_rate": 2.9251591330987653e-05,
"loss": 0.1446,
"step": 2100
},
{
"epoch": 0.25,
"learning_rate": 2.929694216298999e-05,
"loss": 0.1286,
"step": 2125
},
{
"epoch": 0.25,
"learning_rate": 2.9341761569212823e-05,
"loss": 0.1438,
"step": 2150
},
{
"epoch": 0.26,
"learning_rate": 2.938606186014522e-05,
"loss": 0.1776,
"step": 2175
},
{
"epoch": 0.26,
"learning_rate": 2.942985492341032e-05,
"loss": 0.1081,
"step": 2200
},
{
"epoch": 0.26,
"learning_rate": 2.9473152242913582e-05,
"loss": 0.132,
"step": 2225
},
{
"epoch": 0.26,
"learning_rate": 2.9515964916919327e-05,
"loss": 0.1263,
"step": 2250
},
{
"epoch": 0.27,
"learning_rate": 2.9558303675126722e-05,
"loss": 0.0987,
"step": 2275
},
{
"epoch": 0.27,
"learning_rate": 2.9600178894810953e-05,
"loss": 0.0705,
"step": 2300
},
{
"epoch": 0.27,
"learning_rate": 2.964160061609038e-05,
"loss": 0.1175,
"step": 2325
},
{
"epoch": 0.28,
"learning_rate": 2.968257855637576e-05,
"loss": 0.1374,
"step": 2350
},
{
"epoch": 0.28,
"learning_rate": 2.972312212405367e-05,
"loss": 0.0856,
"step": 2375
},
{
"epoch": 0.28,
"learning_rate": 2.9763240431452237e-05,
"loss": 0.1072,
"step": 2400
},
{
"epoch": 0.29,
"learning_rate": 2.9802942307133957e-05,
"loss": 0.1096,
"step": 2425
},
{
"epoch": 0.29,
"learning_rate": 2.9842236307557018e-05,
"loss": 0.0835,
"step": 2450
},
{
"epoch": 0.29,
"learning_rate": 2.9881130728143653e-05,
"loss": 0.1439,
"step": 2475
},
{
"epoch": 0.29,
"learning_rate": 2.9919633613791466e-05,
"loss": 0.1044,
"step": 2500
},
{
"epoch": 0.3,
"learning_rate": 2.9957752768860803e-05,
"loss": 0.0727,
"step": 2525
},
{
"epoch": 0.3,
"learning_rate": 2.99954957666695e-05,
"loss": 0.1749,
"step": 2550
},
{
"epoch": 0.3,
"learning_rate": 2.997252867047486e-05,
"loss": 0.1527,
"step": 2575
},
{
"epoch": 0.31,
"learning_rate": 2.993982470675446e-05,
"loss": 0.1003,
"step": 2600
},
{
"epoch": 0.31,
"learning_rate": 2.9907120743034054e-05,
"loss": 0.1395,
"step": 2625
},
{
"epoch": 0.31,
"learning_rate": 2.9874416779313653e-05,
"loss": 0.1295,
"step": 2650
},
{
"epoch": 0.31,
"learning_rate": 2.9841712815593248e-05,
"loss": 0.1136,
"step": 2675
},
{
"epoch": 0.32,
"learning_rate": 2.9809008851872847e-05,
"loss": 0.0809,
"step": 2700
},
{
"epoch": 0.32,
"learning_rate": 2.9776304888152442e-05,
"loss": 0.0928,
"step": 2725
},
{
"epoch": 0.32,
"learning_rate": 2.974360092443204e-05,
"loss": 0.053,
"step": 2750
},
{
"epoch": 0.33,
"learning_rate": 2.9710896960711636e-05,
"loss": 0.0687,
"step": 2775
},
{
"epoch": 0.33,
"learning_rate": 2.9678192996991235e-05,
"loss": 0.1202,
"step": 2800
},
{
"epoch": 0.33,
"learning_rate": 2.9645489033270833e-05,
"loss": 0.1051,
"step": 2825
},
{
"epoch": 0.34,
"learning_rate": 2.961278506955043e-05,
"loss": 0.0758,
"step": 2850
},
{
"epoch": 0.34,
"learning_rate": 2.9580081105830027e-05,
"loss": 0.129,
"step": 2875
},
{
"epoch": 0.34,
"learning_rate": 2.9547377142109623e-05,
"loss": 0.0972,
"step": 2900
},
{
"epoch": 0.34,
"learning_rate": 2.951467317838922e-05,
"loss": 0.1121,
"step": 2925
},
{
"epoch": 0.35,
"learning_rate": 2.948196921466882e-05,
"loss": 0.095,
"step": 2950
},
{
"epoch": 0.35,
"learning_rate": 2.944926525094842e-05,
"loss": 0.0673,
"step": 2975
},
{
"epoch": 0.35,
"learning_rate": 2.9416561287228014e-05,
"loss": 0.0733,
"step": 3000
},
{
"epoch": 0.36,
"learning_rate": 2.9383857323507613e-05,
"loss": 0.0963,
"step": 3025
},
{
"epoch": 0.36,
"learning_rate": 2.9351153359787208e-05,
"loss": 0.1516,
"step": 3050
},
{
"epoch": 0.36,
"learning_rate": 2.9318449396066807e-05,
"loss": 0.1001,
"step": 3075
},
{
"epoch": 0.36,
"learning_rate": 2.9287053590895216e-05,
"loss": 0.1077,
"step": 3100
},
{
"epoch": 0.37,
"learning_rate": 2.9254349627174815e-05,
"loss": 0.0706,
"step": 3125
},
{
"epoch": 0.37,
"learning_rate": 2.9222953822003228e-05,
"loss": 0.1457,
"step": 3150
},
{
"epoch": 0.37,
"learning_rate": 2.9190249858282823e-05,
"loss": 0.0869,
"step": 3175
},
{
"epoch": 0.38,
"learning_rate": 2.9157545894562422e-05,
"loss": 0.1093,
"step": 3200
},
{
"epoch": 0.38,
"learning_rate": 2.9124841930842017e-05,
"loss": 0.1143,
"step": 3225
},
{
"epoch": 0.38,
"learning_rate": 2.9092137967121616e-05,
"loss": 0.1483,
"step": 3250
},
{
"epoch": 0.39,
"learning_rate": 2.905943400340121e-05,
"loss": 0.0691,
"step": 3275
},
{
"epoch": 0.39,
"learning_rate": 2.902673003968081e-05,
"loss": 0.0885,
"step": 3300
},
{
"epoch": 0.39,
"learning_rate": 2.8994026075960405e-05,
"loss": 0.1006,
"step": 3325
},
{
"epoch": 0.39,
"learning_rate": 2.8961322112240004e-05,
"loss": 0.0975,
"step": 3350
},
{
"epoch": 0.4,
"learning_rate": 2.89286181485196e-05,
"loss": 0.0694,
"step": 3375
},
{
"epoch": 0.4,
"learning_rate": 2.8895914184799198e-05,
"loss": 0.0704,
"step": 3400
},
{
"epoch": 0.4,
"learning_rate": 2.8863210221078793e-05,
"loss": 0.1185,
"step": 3425
},
{
"epoch": 0.41,
"learning_rate": 2.8830506257358392e-05,
"loss": 0.1167,
"step": 3450
},
{
"epoch": 0.41,
"learning_rate": 2.8797802293637987e-05,
"loss": 0.0567,
"step": 3475
},
{
"epoch": 0.41,
"learning_rate": 2.8765098329917586e-05,
"loss": 0.0901,
"step": 3500
},
{
"epoch": 0.41,
"learning_rate": 2.873239436619718e-05,
"loss": 0.0927,
"step": 3525
},
{
"epoch": 0.42,
"learning_rate": 2.8699690402476783e-05,
"loss": 0.068,
"step": 3550
},
{
"epoch": 0.42,
"learning_rate": 2.866698643875638e-05,
"loss": 0.0803,
"step": 3575
},
{
"epoch": 0.42,
"learning_rate": 2.8634282475035977e-05,
"loss": 0.134,
"step": 3600
},
{
"epoch": 0.43,
"learning_rate": 2.8601578511315573e-05,
"loss": 0.0849,
"step": 3625
},
{
"epoch": 0.43,
"learning_rate": 2.856887454759517e-05,
"loss": 0.0919,
"step": 3650
},
{
"epoch": 0.43,
"learning_rate": 2.8536170583874767e-05,
"loss": 0.0518,
"step": 3675
},
{
"epoch": 0.44,
"learning_rate": 2.8503466620154365e-05,
"loss": 0.0617,
"step": 3700
},
{
"epoch": 0.44,
"learning_rate": 2.847076265643396e-05,
"loss": 0.0738,
"step": 3725
},
{
"epoch": 0.44,
"learning_rate": 2.843805869271356e-05,
"loss": 0.1036,
"step": 3750
},
{
"epoch": 0.44,
"learning_rate": 2.8405354728993155e-05,
"loss": 0.0582,
"step": 3775
},
{
"epoch": 0.45,
"learning_rate": 2.8372650765272753e-05,
"loss": 0.0377,
"step": 3800
},
{
"epoch": 0.45,
"learning_rate": 2.833994680155235e-05,
"loss": 0.0736,
"step": 3825
},
{
"epoch": 0.45,
"learning_rate": 2.8307242837831947e-05,
"loss": 0.0487,
"step": 3850
},
{
"epoch": 0.46,
"learning_rate": 2.8274538874111543e-05,
"loss": 0.0928,
"step": 3875
},
{
"epoch": 0.46,
"learning_rate": 2.824183491039114e-05,
"loss": 0.0428,
"step": 3900
},
{
"epoch": 0.46,
"learning_rate": 2.8209130946670737e-05,
"loss": 0.0515,
"step": 3925
},
{
"epoch": 0.47,
"learning_rate": 2.8176426982950335e-05,
"loss": 0.0769,
"step": 3950
},
{
"epoch": 0.47,
"learning_rate": 2.814372301922993e-05,
"loss": 0.05,
"step": 3975
},
{
"epoch": 0.47,
"learning_rate": 2.811101905550953e-05,
"loss": 0.0855,
"step": 4000
},
{
"epoch": 0.47,
"learning_rate": 2.8078315091789125e-05,
"loss": 0.0574,
"step": 4025
},
{
"epoch": 0.48,
"learning_rate": 2.8045611128068723e-05,
"loss": 0.1209,
"step": 4050
},
{
"epoch": 0.48,
"learning_rate": 2.801290716434832e-05,
"loss": 0.0862,
"step": 4075
},
{
"epoch": 0.48,
"learning_rate": 2.7980203200627917e-05,
"loss": 0.0739,
"step": 4100
},
{
"epoch": 0.49,
"learning_rate": 2.7947499236907516e-05,
"loss": 0.072,
"step": 4125
},
{
"epoch": 0.49,
"learning_rate": 2.791479527318711e-05,
"loss": 0.0459,
"step": 4150
},
{
"epoch": 0.49,
"learning_rate": 2.788209130946671e-05,
"loss": 0.0624,
"step": 4175
},
{
"epoch": 0.49,
"learning_rate": 2.7849387345746305e-05,
"loss": 0.1005,
"step": 4200
},
{
"epoch": 0.5,
"learning_rate": 2.7816683382025904e-05,
"loss": 0.0836,
"step": 4225
},
{
"epoch": 0.5,
"learning_rate": 2.77839794183055e-05,
"loss": 0.0868,
"step": 4250
},
{
"epoch": 0.5,
"learning_rate": 2.7751275454585098e-05,
"loss": 0.0764,
"step": 4275
},
{
"epoch": 0.51,
"learning_rate": 2.7718571490864693e-05,
"loss": 0.0332,
"step": 4300
},
{
"epoch": 0.51,
"learning_rate": 2.7685867527144292e-05,
"loss": 0.0306,
"step": 4325
},
{
"epoch": 0.51,
"learning_rate": 2.7653163563423887e-05,
"loss": 0.0424,
"step": 4350
},
{
"epoch": 0.52,
"learning_rate": 2.7620459599703486e-05,
"loss": 0.048,
"step": 4375
},
{
"epoch": 0.52,
"learning_rate": 2.758775563598308e-05,
"loss": 0.0594,
"step": 4400
},
{
"epoch": 0.52,
"learning_rate": 2.755505167226268e-05,
"loss": 0.0598,
"step": 4425
},
{
"epoch": 0.52,
"learning_rate": 2.7522347708542275e-05,
"loss": 0.0534,
"step": 4450
},
{
"epoch": 0.53,
"learning_rate": 2.7489643744821874e-05,
"loss": 0.0971,
"step": 4475
},
{
"epoch": 0.53,
"learning_rate": 2.745693978110147e-05,
"loss": 0.0615,
"step": 4500
},
{
"epoch": 0.53,
"learning_rate": 2.7424235817381068e-05,
"loss": 0.0931,
"step": 4525
},
{
"epoch": 0.54,
"learning_rate": 2.7391531853660663e-05,
"loss": 0.1099,
"step": 4550
},
{
"epoch": 0.54,
"learning_rate": 2.7358827889940262e-05,
"loss": 0.0967,
"step": 4575
},
{
"epoch": 0.54,
"learning_rate": 2.7326123926219857e-05,
"loss": 0.1243,
"step": 4600
},
{
"epoch": 0.54,
"learning_rate": 2.7293419962499456e-05,
"loss": 0.0931,
"step": 4625
},
{
"epoch": 0.55,
"learning_rate": 2.726071599877905e-05,
"loss": 0.0966,
"step": 4650
},
{
"epoch": 0.55,
"learning_rate": 2.722801203505865e-05,
"loss": 0.1032,
"step": 4675
},
{
"epoch": 0.55,
"learning_rate": 2.7195308071338245e-05,
"loss": 0.0705,
"step": 4700
},
{
"epoch": 0.56,
"learning_rate": 2.7162604107617844e-05,
"loss": 0.0841,
"step": 4725
},
{
"epoch": 0.56,
"learning_rate": 2.712990014389744e-05,
"loss": 0.0562,
"step": 4750
},
{
"epoch": 0.56,
"learning_rate": 2.7097196180177038e-05,
"loss": 0.0835,
"step": 4775
},
{
"epoch": 0.57,
"learning_rate": 2.7064492216456633e-05,
"loss": 0.0564,
"step": 4800
},
{
"epoch": 0.57,
"learning_rate": 2.7031788252736232e-05,
"loss": 0.025,
"step": 4825
},
{
"epoch": 0.57,
"learning_rate": 2.6999084289015827e-05,
"loss": 0.0352,
"step": 4850
},
{
"epoch": 0.57,
"learning_rate": 2.6966380325295426e-05,
"loss": 0.0926,
"step": 4875
},
{
"epoch": 0.58,
"learning_rate": 2.693367636157502e-05,
"loss": 0.0619,
"step": 4900
},
{
"epoch": 0.58,
"learning_rate": 2.690097239785462e-05,
"loss": 0.0387,
"step": 4925
},
{
"epoch": 0.58,
"learning_rate": 2.6868268434134215e-05,
"loss": 0.0492,
"step": 4950
},
{
"epoch": 0.59,
"learning_rate": 2.6835564470413814e-05,
"loss": 0.0596,
"step": 4975
},
{
"epoch": 0.59,
"learning_rate": 2.680286050669341e-05,
"loss": 0.0866,
"step": 5000
},
{
"epoch": 0.59,
"learning_rate": 2.6770156542973008e-05,
"loss": 0.064,
"step": 5025
},
{
"epoch": 0.59,
"learning_rate": 2.6737452579252603e-05,
"loss": 0.0223,
"step": 5050
},
{
"epoch": 0.6,
"learning_rate": 2.6704748615532202e-05,
"loss": 0.0384,
"step": 5075
},
{
"epoch": 0.6,
"learning_rate": 2.6672044651811797e-05,
"loss": 0.0702,
"step": 5100
},
{
"epoch": 0.6,
"learning_rate": 2.6639340688091396e-05,
"loss": 0.0279,
"step": 5125
},
{
"epoch": 0.61,
"learning_rate": 2.660663672437099e-05,
"loss": 0.0413,
"step": 5150
},
{
"epoch": 0.61,
"learning_rate": 2.657393276065059e-05,
"loss": 0.0905,
"step": 5175
},
{
"epoch": 0.61,
"learning_rate": 2.6542536955479006e-05,
"loss": 0.0512,
"step": 5200
},
{
"epoch": 0.62,
"learning_rate": 2.65098329917586e-05,
"loss": 0.0652,
"step": 5225
},
{
"epoch": 0.62,
"learning_rate": 2.64771290280382e-05,
"loss": 0.0096,
"step": 5250
},
{
"epoch": 0.62,
"learning_rate": 2.644573322286661e-05,
"loss": 0.0297,
"step": 5275
},
{
"epoch": 0.62,
"learning_rate": 2.641302925914621e-05,
"loss": 0.0656,
"step": 5300
},
{
"epoch": 0.63,
"learning_rate": 2.6380325295425807e-05,
"loss": 0.0045,
"step": 5325
},
{
"epoch": 0.63,
"learning_rate": 2.6347621331705403e-05,
"loss": 0.0663,
"step": 5350
},
{
"epoch": 0.63,
"learning_rate": 2.6314917367985e-05,
"loss": 0.0521,
"step": 5375
},
{
"epoch": 0.64,
"learning_rate": 2.6282213404264597e-05,
"loss": 0.0418,
"step": 5400
},
{
"epoch": 0.64,
"learning_rate": 2.6249509440544195e-05,
"loss": 0.0603,
"step": 5425
},
{
"epoch": 0.64,
"learning_rate": 2.621680547682379e-05,
"loss": 0.0538,
"step": 5450
},
{
"epoch": 0.64,
"learning_rate": 2.618410151310339e-05,
"loss": 0.1083,
"step": 5475
},
{
"epoch": 0.65,
"learning_rate": 2.6151397549382985e-05,
"loss": 0.0512,
"step": 5500
},
{
"epoch": 0.65,
"learning_rate": 2.6118693585662583e-05,
"loss": 0.0807,
"step": 5525
},
{
"epoch": 0.65,
"learning_rate": 2.608598962194218e-05,
"loss": 0.0395,
"step": 5550
},
{
"epoch": 0.66,
"learning_rate": 2.6053285658221777e-05,
"loss": 0.0347,
"step": 5575
},
{
"epoch": 0.66,
"learning_rate": 2.6020581694501373e-05,
"loss": 0.0459,
"step": 5600
},
{
"epoch": 0.66,
"learning_rate": 2.598787773078097e-05,
"loss": 0.0841,
"step": 5625
},
{
"epoch": 0.67,
"learning_rate": 2.5955173767060567e-05,
"loss": 0.016,
"step": 5650
},
{
"epoch": 0.67,
"learning_rate": 2.5922469803340165e-05,
"loss": 0.0258,
"step": 5675
},
{
"epoch": 0.67,
"learning_rate": 2.588976583961976e-05,
"loss": 0.0755,
"step": 5700
},
{
"epoch": 0.67,
"learning_rate": 2.585706187589936e-05,
"loss": 0.0592,
"step": 5725
},
{
"epoch": 0.68,
"learning_rate": 2.5824357912178955e-05,
"loss": 0.0366,
"step": 5750
},
{
"epoch": 0.68,
"learning_rate": 2.5791653948458553e-05,
"loss": 0.0503,
"step": 5775
},
{
"epoch": 0.68,
"learning_rate": 2.575894998473815e-05,
"loss": 0.0659,
"step": 5800
},
{
"epoch": 0.69,
"learning_rate": 2.5726246021017747e-05,
"loss": 0.0161,
"step": 5825
},
{
"epoch": 0.69,
"learning_rate": 2.5693542057297346e-05,
"loss": 0.0636,
"step": 5850
},
{
"epoch": 0.69,
"learning_rate": 2.5660838093576945e-05,
"loss": 0.0422,
"step": 5875
},
{
"epoch": 0.69,
"learning_rate": 2.562813412985654e-05,
"loss": 0.05,
"step": 5900
},
{
"epoch": 0.7,
"learning_rate": 2.559543016613614e-05,
"loss": 0.0405,
"step": 5925
},
{
"epoch": 0.7,
"learning_rate": 2.5562726202415734e-05,
"loss": 0.1111,
"step": 5950
},
{
"epoch": 0.7,
"learning_rate": 2.5530022238695333e-05,
"loss": 0.068,
"step": 5975
},
{
"epoch": 0.71,
"learning_rate": 2.5497318274974928e-05,
"loss": 0.0483,
"step": 6000
},
{
"epoch": 0.71,
"learning_rate": 2.5464614311254527e-05,
"loss": 0.088,
"step": 6025
},
{
"epoch": 0.71,
"learning_rate": 2.5431910347534122e-05,
"loss": 0.0624,
"step": 6050
},
{
"epoch": 0.72,
"learning_rate": 2.539920638381372e-05,
"loss": 0.0623,
"step": 6075
},
{
"epoch": 0.72,
"learning_rate": 2.5366502420093316e-05,
"loss": 0.0427,
"step": 6100
},
{
"epoch": 0.72,
"learning_rate": 2.5333798456372915e-05,
"loss": 0.0331,
"step": 6125
},
{
"epoch": 0.72,
"learning_rate": 2.530109449265251e-05,
"loss": 0.0256,
"step": 6150
},
{
"epoch": 0.73,
"learning_rate": 2.526839052893211e-05,
"loss": 0.0386,
"step": 6175
},
{
"epoch": 0.73,
"learning_rate": 2.5235686565211704e-05,
"loss": 0.0702,
"step": 6200
},
{
"epoch": 0.73,
"learning_rate": 2.5202982601491303e-05,
"loss": 0.0206,
"step": 6225
},
{
"epoch": 0.74,
"learning_rate": 2.5170278637770898e-05,
"loss": 0.0622,
"step": 6250
},
{
"epoch": 0.74,
"learning_rate": 2.5137574674050497e-05,
"loss": 0.0574,
"step": 6275
},
{
"epoch": 0.74,
"learning_rate": 2.5104870710330092e-05,
"loss": 0.0381,
"step": 6300
},
{
"epoch": 0.74,
"learning_rate": 2.507216674660969e-05,
"loss": 0.0489,
"step": 6325
},
{
"epoch": 0.75,
"learning_rate": 2.5039462782889286e-05,
"loss": 0.0636,
"step": 6350
},
{
"epoch": 0.75,
"learning_rate": 2.50080669777177e-05,
"loss": 0.0777,
"step": 6375
},
{
"epoch": 0.75,
"learning_rate": 2.4975363013997298e-05,
"loss": 0.0638,
"step": 6400
},
{
"epoch": 0.76,
"learning_rate": 2.4942659050276893e-05,
"loss": 0.0449,
"step": 6425
},
{
"epoch": 0.76,
"learning_rate": 2.490995508655649e-05,
"loss": 0.0384,
"step": 6450
},
{
"epoch": 0.76,
"learning_rate": 2.4877251122836087e-05,
"loss": 0.021,
"step": 6475
},
{
"epoch": 0.77,
"learning_rate": 2.4844547159115686e-05,
"loss": 0.036,
"step": 6500
},
{
"epoch": 0.77,
"learning_rate": 2.481184319539528e-05,
"loss": 0.0136,
"step": 6525
},
{
"epoch": 0.77,
"learning_rate": 2.477913923167488e-05,
"loss": 0.0357,
"step": 6550
},
{
"epoch": 0.77,
"learning_rate": 2.4746435267954475e-05,
"loss": 0.0483,
"step": 6575
},
{
"epoch": 0.78,
"learning_rate": 2.4713731304234074e-05,
"loss": 0.0508,
"step": 6600
},
{
"epoch": 0.78,
"learning_rate": 2.468102734051367e-05,
"loss": 0.0226,
"step": 6625
},
{
"epoch": 0.78,
"learning_rate": 2.4648323376793268e-05,
"loss": 0.0712,
"step": 6650
},
{
"epoch": 0.79,
"learning_rate": 2.4615619413072863e-05,
"loss": 0.0439,
"step": 6675
},
{
"epoch": 0.79,
"learning_rate": 2.458291544935246e-05,
"loss": 0.051,
"step": 6700
},
{
"epoch": 0.79,
"learning_rate": 2.4550211485632057e-05,
"loss": 0.0147,
"step": 6725
},
{
"epoch": 0.79,
"learning_rate": 2.4517507521911656e-05,
"loss": 0.0288,
"step": 6750
},
{
"epoch": 0.8,
"learning_rate": 2.448480355819125e-05,
"loss": 0.091,
"step": 6775
},
{
"epoch": 0.8,
"learning_rate": 2.445209959447085e-05,
"loss": 0.0535,
"step": 6800
},
{
"epoch": 0.8,
"learning_rate": 2.4419395630750445e-05,
"loss": 0.0885,
"step": 6825
},
{
"epoch": 0.81,
"learning_rate": 2.4386691667030044e-05,
"loss": 0.0644,
"step": 6850
},
{
"epoch": 0.81,
"learning_rate": 2.435398770330964e-05,
"loss": 0.0638,
"step": 6875
},
{
"epoch": 0.81,
"learning_rate": 2.4322591898138055e-05,
"loss": 0.0398,
"step": 6900
},
{
"epoch": 0.82,
"learning_rate": 2.4289887934417654e-05,
"loss": 0.0862,
"step": 6925
},
{
"epoch": 0.82,
"learning_rate": 2.425718397069725e-05,
"loss": 0.034,
"step": 6950
},
{
"epoch": 0.82,
"learning_rate": 2.4224480006976848e-05,
"loss": 0.0211,
"step": 6975
},
{
"epoch": 0.82,
"learning_rate": 2.4191776043256443e-05,
"loss": 0.0469,
"step": 7000
},
{
"epoch": 0.83,
"learning_rate": 2.4159072079536042e-05,
"loss": 0.0347,
"step": 7025
},
{
"epoch": 0.83,
"learning_rate": 2.4126368115815637e-05,
"loss": 0.0452,
"step": 7050
},
{
"epoch": 0.83,
"learning_rate": 2.4093664152095236e-05,
"loss": 0.041,
"step": 7075
},
{
"epoch": 0.84,
"learning_rate": 2.406096018837483e-05,
"loss": 0.0611,
"step": 7100
},
{
"epoch": 0.84,
"learning_rate": 2.402825622465443e-05,
"loss": 0.0373,
"step": 7125
},
{
"epoch": 0.84,
"learning_rate": 2.3995552260934025e-05,
"loss": 0.0206,
"step": 7150
},
{
"epoch": 0.84,
"learning_rate": 2.3962848297213624e-05,
"loss": 0.0521,
"step": 7175
},
{
"epoch": 0.85,
"learning_rate": 2.393014433349322e-05,
"loss": 0.0714,
"step": 7200
},
{
"epoch": 0.85,
"learning_rate": 2.3897440369772818e-05,
"loss": 0.0474,
"step": 7225
},
{
"epoch": 0.85,
"learning_rate": 2.3864736406052413e-05,
"loss": 0.0456,
"step": 7250
},
{
"epoch": 0.86,
"learning_rate": 2.3832032442332012e-05,
"loss": 0.029,
"step": 7275
},
{
"epoch": 0.86,
"learning_rate": 2.3799328478611607e-05,
"loss": 0.0293,
"step": 7300
},
{
"epoch": 0.86,
"learning_rate": 2.3766624514891206e-05,
"loss": 0.0381,
"step": 7325
},
{
"epoch": 0.87,
"learning_rate": 2.37339205511708e-05,
"loss": 0.0316,
"step": 7350
},
{
"epoch": 0.87,
"learning_rate": 2.37012165874504e-05,
"loss": 0.0262,
"step": 7375
},
{
"epoch": 0.87,
"learning_rate": 2.3668512623729995e-05,
"loss": 0.0528,
"step": 7400
},
{
"epoch": 0.87,
"learning_rate": 2.3635808660009594e-05,
"loss": 0.0351,
"step": 7425
},
{
"epoch": 0.88,
"learning_rate": 2.360310469628919e-05,
"loss": 0.0767,
"step": 7450
},
{
"epoch": 0.88,
"learning_rate": 2.3570400732568788e-05,
"loss": 0.0353,
"step": 7475
},
{
"epoch": 0.88,
"learning_rate": 2.3537696768848383e-05,
"loss": 0.0405,
"step": 7500
},
{
"epoch": 0.89,
"learning_rate": 2.3504992805127982e-05,
"loss": 0.0455,
"step": 7525
},
{
"epoch": 0.89,
"learning_rate": 2.3472288841407577e-05,
"loss": 0.0711,
"step": 7550
},
{
"epoch": 0.89,
"learning_rate": 2.3439584877687176e-05,
"loss": 0.048,
"step": 7575
},
{
"epoch": 0.89,
"learning_rate": 2.340688091396677e-05,
"loss": 0.036,
"step": 7600
},
{
"epoch": 0.9,
"learning_rate": 2.337417695024637e-05,
"loss": 0.032,
"step": 7625
},
{
"epoch": 0.9,
"learning_rate": 2.3341472986525965e-05,
"loss": 0.0352,
"step": 7650
},
{
"epoch": 0.9,
"learning_rate": 2.3308769022805564e-05,
"loss": 0.0388,
"step": 7675
},
{
"epoch": 0.91,
"learning_rate": 2.3276065059085163e-05,
"loss": 0.0437,
"step": 7700
},
{
"epoch": 0.91,
"learning_rate": 2.3243361095364758e-05,
"loss": 0.0367,
"step": 7725
},
{
"epoch": 0.91,
"learning_rate": 2.3210657131644357e-05,
"loss": 0.0236,
"step": 7750
},
{
"epoch": 0.92,
"learning_rate": 2.3177953167923952e-05,
"loss": 0.0422,
"step": 7775
},
{
"epoch": 0.92,
"learning_rate": 2.314524920420355e-05,
"loss": 0.0157,
"step": 7800
},
{
"epoch": 0.92,
"learning_rate": 2.3112545240483146e-05,
"loss": 0.0475,
"step": 7825
},
{
"epoch": 0.92,
"learning_rate": 2.3079841276762745e-05,
"loss": 0.0128,
"step": 7850
},
{
"epoch": 0.93,
"learning_rate": 2.304713731304234e-05,
"loss": 0.0595,
"step": 7875
},
{
"epoch": 0.93,
"learning_rate": 2.301443334932194e-05,
"loss": 0.0211,
"step": 7900
},
{
"epoch": 0.93,
"learning_rate": 2.298303754415035e-05,
"loss": 0.0526,
"step": 7925
},
{
"epoch": 0.94,
"learning_rate": 2.295033358042995e-05,
"loss": 0.0532,
"step": 7950
},
{
"epoch": 0.94,
"learning_rate": 2.2917629616709546e-05,
"loss": 0.0373,
"step": 7975
},
{
"epoch": 0.94,
"learning_rate": 2.2884925652989144e-05,
"loss": 0.0519,
"step": 8000
},
{
"epoch": 0.94,
"learning_rate": 2.285222168926874e-05,
"loss": 0.0144,
"step": 8025
},
{
"epoch": 0.95,
"learning_rate": 2.2819517725548338e-05,
"loss": 0.0295,
"step": 8050
},
{
"epoch": 0.95,
"learning_rate": 2.2786813761827934e-05,
"loss": 0.0468,
"step": 8075
},
{
"epoch": 0.95,
"learning_rate": 2.2754109798107532e-05,
"loss": 0.0503,
"step": 8100
},
{
"epoch": 0.96,
"learning_rate": 2.2721405834387128e-05,
"loss": 0.045,
"step": 8125
},
{
"epoch": 0.96,
"learning_rate": 2.2688701870666726e-05,
"loss": 0.0605,
"step": 8150
},
{
"epoch": 0.96,
"learning_rate": 2.265599790694632e-05,
"loss": 0.0393,
"step": 8175
},
{
"epoch": 0.97,
"learning_rate": 2.262329394322592e-05,
"loss": 0.0173,
"step": 8200
},
{
"epoch": 0.97,
"learning_rate": 2.2590589979505516e-05,
"loss": 0.0301,
"step": 8225
},
{
"epoch": 0.97,
"learning_rate": 2.2557886015785114e-05,
"loss": 0.0541,
"step": 8250
},
{
"epoch": 0.97,
"learning_rate": 2.2526490210613527e-05,
"loss": 0.019,
"step": 8275
},
{
"epoch": 0.98,
"learning_rate": 2.2493786246893123e-05,
"loss": 0.0348,
"step": 8300
},
{
"epoch": 0.98,
"learning_rate": 2.246108228317272e-05,
"loss": 0.0299,
"step": 8325
},
{
"epoch": 0.98,
"learning_rate": 2.2428378319452317e-05,
"loss": 0.0431,
"step": 8350
},
{
"epoch": 0.99,
"learning_rate": 2.2395674355731915e-05,
"loss": 0.0196,
"step": 8375
},
{
"epoch": 0.99,
"learning_rate": 2.236297039201151e-05,
"loss": 0.0341,
"step": 8400
},
{
"epoch": 0.99,
"learning_rate": 2.233026642829111e-05,
"loss": 0.0199,
"step": 8425
},
{
"epoch": 0.99,
"learning_rate": 2.2297562464570705e-05,
"loss": 0.0276,
"step": 8450
},
{
"epoch": 1.0,
"learning_rate": 2.2264858500850303e-05,
"loss": 0.0152,
"step": 8475
},
{
"epoch": 1.0,
"eval_accuracy": 0.8430570068501103,
"eval_loss": 0.90869140625,
"eval_runtime": 4380.2814,
"eval_samples_per_second": 7.865,
"eval_steps_per_second": 0.246,
"step": 8494
},
{
"epoch": 1.0,
"learning_rate": 2.22321545371299e-05,
"loss": 0.042,
"step": 8500
},
{
"epoch": 1.0,
"learning_rate": 2.2199450573409497e-05,
"loss": 0.0308,
"step": 8525
},
{
"epoch": 1.01,
"learning_rate": 2.2166746609689093e-05,
"loss": 0.0046,
"step": 8550
},
{
"epoch": 1.01,
"learning_rate": 2.213404264596869e-05,
"loss": 0.0261,
"step": 8575
},
{
"epoch": 1.01,
"learning_rate": 2.2101338682248287e-05,
"loss": 0.0074,
"step": 8600
},
{
"epoch": 1.02,
"learning_rate": 2.2068634718527885e-05,
"loss": 0.0585,
"step": 8625
},
{
"epoch": 1.02,
"learning_rate": 2.203593075480748e-05,
"loss": 0.0461,
"step": 8650
},
{
"epoch": 1.02,
"learning_rate": 2.200322679108708e-05,
"loss": 0.0518,
"step": 8675
},
{
"epoch": 1.02,
"learning_rate": 2.1970522827366674e-05,
"loss": 0.0276,
"step": 8700
},
{
"epoch": 1.03,
"learning_rate": 2.1937818863646273e-05,
"loss": 0.01,
"step": 8725
},
{
"epoch": 1.03,
"learning_rate": 2.190511489992587e-05,
"loss": 0.026,
"step": 8750
},
{
"epoch": 1.03,
"learning_rate": 2.187241093620547e-05,
"loss": 0.048,
"step": 8775
},
{
"epoch": 1.04,
"learning_rate": 2.1839706972485066e-05,
"loss": 0.0211,
"step": 8800
},
{
"epoch": 1.04,
"learning_rate": 2.1807003008764665e-05,
"loss": 0.0485,
"step": 8825
},
{
"epoch": 1.04,
"learning_rate": 2.177429904504426e-05,
"loss": 0.0152,
"step": 8850
},
{
"epoch": 1.04,
"learning_rate": 2.174159508132386e-05,
"loss": 0.0235,
"step": 8875
},
{
"epoch": 1.05,
"learning_rate": 2.1708891117603454e-05,
"loss": 0.0067,
"step": 8900
},
{
"epoch": 1.05,
"learning_rate": 2.1676187153883053e-05,
"loss": 0.0406,
"step": 8925
},
{
"epoch": 1.05,
"learning_rate": 2.1643483190162648e-05,
"loss": 0.0269,
"step": 8950
},
{
"epoch": 1.06,
"learning_rate": 2.1610779226442247e-05,
"loss": 0.0167,
"step": 8975
},
{
"epoch": 1.06,
"learning_rate": 2.1578075262721845e-05,
"loss": 0.0426,
"step": 9000
},
{
"epoch": 1.06,
"learning_rate": 2.154537129900144e-05,
"loss": 0.0175,
"step": 9025
},
{
"epoch": 1.07,
"learning_rate": 2.151266733528104e-05,
"loss": 0.0445,
"step": 9050
},
{
"epoch": 1.07,
"learning_rate": 2.1479963371560635e-05,
"loss": 0.0028,
"step": 9075
},
{
"epoch": 1.07,
"learning_rate": 2.1447259407840233e-05,
"loss": 0.046,
"step": 9100
},
{
"epoch": 1.07,
"learning_rate": 2.141455544411983e-05,
"loss": 0.0443,
"step": 9125
},
{
"epoch": 1.08,
"learning_rate": 2.1381851480399427e-05,
"loss": 0.026,
"step": 9150
},
{
"epoch": 1.08,
"learning_rate": 2.1349147516679023e-05,
"loss": 0.029,
"step": 9175
},
{
"epoch": 1.08,
"learning_rate": 2.131644355295862e-05,
"loss": 0.0131,
"step": 9200
},
{
"epoch": 1.09,
"learning_rate": 2.1283739589238217e-05,
"loss": 0.0164,
"step": 9225
},
{
"epoch": 1.09,
"learning_rate": 2.1251035625517815e-05,
"loss": 0.0219,
"step": 9250
},
{
"epoch": 1.09,
"learning_rate": 2.121833166179741e-05,
"loss": 0.0171,
"step": 9275
},
{
"epoch": 1.09,
"learning_rate": 2.118562769807701e-05,
"loss": 0.013,
"step": 9300
},
{
"epoch": 1.1,
"learning_rate": 2.1152923734356605e-05,
"loss": 0.0433,
"step": 9325
},
{
"epoch": 1.1,
"learning_rate": 2.1120219770636203e-05,
"loss": 0.0235,
"step": 9350
},
{
"epoch": 1.1,
"learning_rate": 2.10875158069158e-05,
"loss": 0.0227,
"step": 9375
},
{
"epoch": 1.11,
"learning_rate": 2.1054811843195397e-05,
"loss": 0.0157,
"step": 9400
},
{
"epoch": 1.11,
"learning_rate": 2.1022107879474993e-05,
"loss": 0.0358,
"step": 9425
},
{
"epoch": 1.11,
"learning_rate": 2.098940391575459e-05,
"loss": 0.0135,
"step": 9450
},
{
"epoch": 1.12,
"learning_rate": 2.0958008110583e-05,
"loss": 0.0248,
"step": 9475
},
{
"epoch": 1.12,
"learning_rate": 2.09253041468626e-05,
"loss": 0.0432,
"step": 9500
},
{
"epoch": 1.12,
"learning_rate": 2.0892600183142195e-05,
"loss": 0.005,
"step": 9525
},
{
"epoch": 1.12,
"learning_rate": 2.0859896219421794e-05,
"loss": 0.0066,
"step": 9550
},
{
"epoch": 1.13,
"learning_rate": 2.0827192255701392e-05,
"loss": 0.045,
"step": 9575
},
{
"epoch": 1.13,
"learning_rate": 2.0794488291980988e-05,
"loss": 0.0386,
"step": 9600
},
{
"epoch": 1.13,
"learning_rate": 2.0763092486809404e-05,
"loss": 0.0592,
"step": 9625
},
{
"epoch": 1.14,
"learning_rate": 2.0730388523089e-05,
"loss": 0.0084,
"step": 9650
},
{
"epoch": 1.14,
"learning_rate": 2.0697684559368598e-05,
"loss": 0.0309,
"step": 9675
},
{
"epoch": 1.14,
"learning_rate": 2.0664980595648193e-05,
"loss": 0.0309,
"step": 9700
},
{
"epoch": 1.14,
"learning_rate": 2.0632276631927792e-05,
"loss": 0.0138,
"step": 9725
},
{
"epoch": 1.15,
"learning_rate": 2.0599572668207387e-05,
"loss": 0.0131,
"step": 9750
},
{
"epoch": 1.15,
"learning_rate": 2.0566868704486986e-05,
"loss": 0.0432,
"step": 9775
},
{
"epoch": 1.15,
"learning_rate": 2.053416474076658e-05,
"loss": 0.0379,
"step": 9800
},
{
"epoch": 1.16,
"learning_rate": 2.050146077704618e-05,
"loss": 0.0016,
"step": 9825
},
{
"epoch": 1.16,
"learning_rate": 2.0468756813325775e-05,
"loss": 0.0143,
"step": 9850
},
{
"epoch": 1.16,
"learning_rate": 2.0436052849605374e-05,
"loss": 0.0362,
"step": 9875
},
{
"epoch": 1.17,
"learning_rate": 2.040334888588497e-05,
"loss": 0.0238,
"step": 9900
},
{
"epoch": 1.17,
"learning_rate": 2.0370644922164568e-05,
"loss": 0.0526,
"step": 9925
},
{
"epoch": 1.17,
"learning_rate": 2.0337940958444163e-05,
"loss": 0.0022,
"step": 9950
},
{
"epoch": 1.17,
"learning_rate": 2.0305236994723762e-05,
"loss": 0.0391,
"step": 9975
},
{
"epoch": 1.18,
"learning_rate": 2.0272533031003357e-05,
"loss": 0.0103,
"step": 10000
},
{
"epoch": 1.18,
"learning_rate": 2.0239829067282956e-05,
"loss": 0.0261,
"step": 10025
},
{
"epoch": 1.18,
"learning_rate": 2.020712510356255e-05,
"loss": 0.0271,
"step": 10050
},
{
"epoch": 1.19,
"learning_rate": 2.017442113984215e-05,
"loss": 0.0169,
"step": 10075
},
{
"epoch": 1.19,
"learning_rate": 2.0141717176121745e-05,
"loss": 0.0358,
"step": 10100
},
{
"epoch": 1.19,
"learning_rate": 2.0109013212401344e-05,
"loss": 0.0077,
"step": 10125
},
{
"epoch": 1.19,
"learning_rate": 2.007630924868094e-05,
"loss": 0.0231,
"step": 10150
},
{
"epoch": 1.2,
"learning_rate": 2.0043605284960538e-05,
"loss": 0.0168,
"step": 10175
},
{
"epoch": 1.2,
"learning_rate": 2.0010901321240133e-05,
"loss": 0.0181,
"step": 10200
},
{
"epoch": 1.2,
"learning_rate": 1.9978197357519732e-05,
"loss": 0.023,
"step": 10225
},
{
"epoch": 1.21,
"learning_rate": 1.994549339379933e-05,
"loss": 0.0242,
"step": 10250
},
{
"epoch": 1.21,
"learning_rate": 1.9912789430078926e-05,
"loss": 0.0287,
"step": 10275
},
{
"epoch": 1.21,
"learning_rate": 1.9880085466358525e-05,
"loss": 0.0177,
"step": 10300
},
{
"epoch": 1.22,
"learning_rate": 1.984738150263812e-05,
"loss": 0.0249,
"step": 10325
},
{
"epoch": 1.22,
"learning_rate": 1.981467753891772e-05,
"loss": 0.0396,
"step": 10350
},
{
"epoch": 1.22,
"learning_rate": 1.9781973575197314e-05,
"loss": 0.0487,
"step": 10375
},
{
"epoch": 1.22,
"learning_rate": 1.9749269611476913e-05,
"loss": 0.0111,
"step": 10400
},
{
"epoch": 1.23,
"learning_rate": 1.9716565647756508e-05,
"loss": 0.0259,
"step": 10425
},
{
"epoch": 1.23,
"learning_rate": 1.9683861684036107e-05,
"loss": 0.0303,
"step": 10450
},
{
"epoch": 1.23,
"learning_rate": 1.9651157720315702e-05,
"loss": 0.0229,
"step": 10475
},
{
"epoch": 1.24,
"learning_rate": 1.96184537565953e-05,
"loss": 0.0234,
"step": 10500
},
{
"epoch": 1.24,
"learning_rate": 1.9585749792874896e-05,
"loss": 0.0054,
"step": 10525
},
{
"epoch": 1.24,
"learning_rate": 1.9553045829154495e-05,
"loss": 0.0141,
"step": 10550
},
{
"epoch": 1.24,
"learning_rate": 1.952034186543409e-05,
"loss": 0.0013,
"step": 10575
},
{
"epoch": 1.25,
"learning_rate": 1.948763790171369e-05,
"loss": 0.0076,
"step": 10600
},
{
"epoch": 1.25,
"learning_rate": 1.9454933937993284e-05,
"loss": 0.0235,
"step": 10625
},
{
"epoch": 1.25,
"learning_rate": 1.9422229974272883e-05,
"loss": 0.0341,
"step": 10650
},
{
"epoch": 1.26,
"learning_rate": 1.9389526010552478e-05,
"loss": 0.0688,
"step": 10675
},
{
"epoch": 1.26,
"learning_rate": 1.9356822046832077e-05,
"loss": 0.0339,
"step": 10700
},
{
"epoch": 1.26,
"learning_rate": 1.9324118083111672e-05,
"loss": 0.0162,
"step": 10725
},
{
"epoch": 1.27,
"learning_rate": 1.929141411939127e-05,
"loss": 0.0126,
"step": 10750
},
{
"epoch": 1.27,
"learning_rate": 1.9258710155670866e-05,
"loss": 0.0151,
"step": 10775
},
{
"epoch": 1.27,
"learning_rate": 1.9226006191950464e-05,
"loss": 0.0093,
"step": 10800
},
{
"epoch": 1.27,
"learning_rate": 1.919330222823006e-05,
"loss": 0.014,
"step": 10825
},
{
"epoch": 1.28,
"learning_rate": 1.916059826450966e-05,
"loss": 0.0327,
"step": 10850
},
{
"epoch": 1.28,
"learning_rate": 1.9127894300789254e-05,
"loss": 0.019,
"step": 10875
},
{
"epoch": 1.28,
"learning_rate": 1.9095190337068852e-05,
"loss": 0.0202,
"step": 10900
},
{
"epoch": 1.29,
"learning_rate": 1.9062486373348448e-05,
"loss": 0.0212,
"step": 10925
},
{
"epoch": 1.29,
"learning_rate": 1.9029782409628046e-05,
"loss": 0.0096,
"step": 10950
},
{
"epoch": 1.29,
"learning_rate": 1.8997078445907642e-05,
"loss": 0.0184,
"step": 10975
},
{
"epoch": 1.3,
"learning_rate": 1.896437448218724e-05,
"loss": 0.0008,
"step": 11000
},
{
"epoch": 1.3,
"learning_rate": 1.8931670518466836e-05,
"loss": 0.01,
"step": 11025
},
{
"epoch": 1.3,
"learning_rate": 1.8898966554746434e-05,
"loss": 0.014,
"step": 11050
},
{
"epoch": 1.3,
"learning_rate": 1.8866262591026033e-05,
"loss": 0.025,
"step": 11075
},
{
"epoch": 1.31,
"learning_rate": 1.8833558627305632e-05,
"loss": 0.0279,
"step": 11100
},
{
"epoch": 1.31,
"learning_rate": 1.8800854663585227e-05,
"loss": 0.0175,
"step": 11125
},
{
"epoch": 1.31,
"learning_rate": 1.8768150699864826e-05,
"loss": 0.0102,
"step": 11150
},
{
"epoch": 1.32,
"learning_rate": 1.873544673614442e-05,
"loss": 0.0316,
"step": 11175
},
{
"epoch": 1.32,
"learning_rate": 1.870274277242402e-05,
"loss": 0.0179,
"step": 11200
},
{
"epoch": 1.32,
"learning_rate": 1.8670038808703615e-05,
"loss": 0.055,
"step": 11225
},
{
"epoch": 1.32,
"learning_rate": 1.8637334844983214e-05,
"loss": 0.0191,
"step": 11250
},
{
"epoch": 1.33,
"learning_rate": 1.860463088126281e-05,
"loss": 0.0177,
"step": 11275
},
{
"epoch": 1.33,
"learning_rate": 1.8571926917542408e-05,
"loss": 0.0191,
"step": 11300
},
{
"epoch": 1.33,
"learning_rate": 1.8539222953822003e-05,
"loss": 0.0154,
"step": 11325
},
{
"epoch": 1.34,
"learning_rate": 1.8506518990101602e-05,
"loss": 0.0047,
"step": 11350
},
{
"epoch": 1.34,
"learning_rate": 1.84738150263812e-05,
"loss": 0.0061,
"step": 11375
},
{
"epoch": 1.34,
"learning_rate": 1.8441111062660796e-05,
"loss": 0.0135,
"step": 11400
},
{
"epoch": 1.35,
"learning_rate": 1.840971525748921e-05,
"loss": 0.0377,
"step": 11425
},
{
"epoch": 1.35,
"learning_rate": 1.8377011293768804e-05,
"loss": 0.0232,
"step": 11450
},
{
"epoch": 1.35,
"learning_rate": 1.8344307330048403e-05,
"loss": 0.0135,
"step": 11475
},
{
"epoch": 1.35,
"learning_rate": 1.8311603366327998e-05,
"loss": 0.0331,
"step": 11500
},
{
"epoch": 1.36,
"learning_rate": 1.8278899402607597e-05,
"loss": 0.0352,
"step": 11525
},
{
"epoch": 1.36,
"learning_rate": 1.8246195438887192e-05,
"loss": 0.0363,
"step": 11550
},
{
"epoch": 1.36,
"learning_rate": 1.8214799633715605e-05,
"loss": 0.029,
"step": 11575
},
{
"epoch": 1.37,
"learning_rate": 1.8182095669995204e-05,
"loss": 0.0266,
"step": 11600
},
{
"epoch": 1.37,
"learning_rate": 1.81493917062748e-05,
"loss": 0.0175,
"step": 11625
},
{
"epoch": 1.37,
"learning_rate": 1.8116687742554398e-05,
"loss": 0.0137,
"step": 11650
},
{
"epoch": 1.37,
"learning_rate": 1.8083983778833996e-05,
"loss": 0.0156,
"step": 11675
},
{
"epoch": 1.38,
"learning_rate": 1.8051279815113595e-05,
"loss": 0.0354,
"step": 11700
},
{
"epoch": 1.38,
"learning_rate": 1.801857585139319e-05,
"loss": 0.0023,
"step": 11725
},
{
"epoch": 1.38,
"learning_rate": 1.798587188767279e-05,
"loss": 0.0453,
"step": 11750
},
{
"epoch": 1.39,
"learning_rate": 1.7953167923952384e-05,
"loss": 0.0431,
"step": 11775
},
{
"epoch": 1.39,
"learning_rate": 1.7920463960231983e-05,
"loss": 0.0203,
"step": 11800
},
{
"epoch": 1.39,
"learning_rate": 1.788775999651158e-05,
"loss": 0.0459,
"step": 11825
},
{
"epoch": 1.4,
"learning_rate": 1.7855056032791177e-05,
"loss": 0.0236,
"step": 11850
},
{
"epoch": 1.4,
"learning_rate": 1.7822352069070772e-05,
"loss": 0.0357,
"step": 11875
},
{
"epoch": 1.4,
"learning_rate": 1.778964810535037e-05,
"loss": 0.0542,
"step": 11900
},
{
"epoch": 1.4,
"learning_rate": 1.7756944141629966e-05,
"loss": 0.0044,
"step": 11925
},
{
"epoch": 1.41,
"learning_rate": 1.7724240177909565e-05,
"loss": 0.0068,
"step": 11950
},
{
"epoch": 1.41,
"learning_rate": 1.769153621418916e-05,
"loss": 0.0346,
"step": 11975
},
{
"epoch": 1.41,
"learning_rate": 1.765883225046876e-05,
"loss": 0.0473,
"step": 12000
},
{
"epoch": 1.42,
"learning_rate": 1.7626128286748354e-05,
"loss": 0.0049,
"step": 12025
},
{
"epoch": 1.42,
"learning_rate": 1.7593424323027953e-05,
"loss": 0.0054,
"step": 12050
},
{
"epoch": 1.42,
"learning_rate": 1.756072035930755e-05,
"loss": 0.0349,
"step": 12075
},
{
"epoch": 1.42,
"learning_rate": 1.7528016395587147e-05,
"loss": 0.0147,
"step": 12100
},
{
"epoch": 1.43,
"learning_rate": 1.7495312431866742e-05,
"loss": 0.0049,
"step": 12125
},
{
"epoch": 1.43,
"learning_rate": 1.746260846814634e-05,
"loss": 0.0142,
"step": 12150
},
{
"epoch": 1.43,
"learning_rate": 1.7429904504425936e-05,
"loss": 0.0004,
"step": 12175
},
{
"epoch": 1.44,
"learning_rate": 1.7397200540705535e-05,
"loss": 0.0124,
"step": 12200
},
{
"epoch": 1.44,
"learning_rate": 1.736449657698513e-05,
"loss": 0.0023,
"step": 12225
},
{
"epoch": 1.44,
"learning_rate": 1.733179261326473e-05,
"loss": 0.0403,
"step": 12250
},
{
"epoch": 1.45,
"learning_rate": 1.7299088649544324e-05,
"loss": 0.0039,
"step": 12275
},
{
"epoch": 1.45,
"learning_rate": 1.7266384685823923e-05,
"loss": 0.0258,
"step": 12300
},
{
"epoch": 1.45,
"learning_rate": 1.723368072210352e-05,
"loss": 0.0134,
"step": 12325
},
{
"epoch": 1.45,
"learning_rate": 1.7200976758383117e-05,
"loss": 0.0229,
"step": 12350
},
{
"epoch": 1.46,
"learning_rate": 1.7168272794662712e-05,
"loss": 0.0136,
"step": 12375
},
{
"epoch": 1.46,
"learning_rate": 1.713556883094231e-05,
"loss": 0.0042,
"step": 12400
},
{
"epoch": 1.46,
"learning_rate": 1.7102864867221906e-05,
"loss": 0.0187,
"step": 12425
},
{
"epoch": 1.47,
"learning_rate": 1.7070160903501505e-05,
"loss": 0.0113,
"step": 12450
},
{
"epoch": 1.47,
"learning_rate": 1.70374569397811e-05,
"loss": 0.0002,
"step": 12475
},
{
"epoch": 1.47,
"learning_rate": 1.70047529760607e-05,
"loss": 0.0141,
"step": 12500
},
{
"epoch": 1.47,
"learning_rate": 1.6972049012340294e-05,
"loss": 0.0435,
"step": 12525
},
{
"epoch": 1.48,
"learning_rate": 1.6939345048619893e-05,
"loss": 0.0111,
"step": 12550
},
{
"epoch": 1.48,
"learning_rate": 1.690664108489949e-05,
"loss": 0.0072,
"step": 12575
},
{
"epoch": 1.48,
"learning_rate": 1.6873937121179087e-05,
"loss": 0.0368,
"step": 12600
},
{
"epoch": 1.49,
"learning_rate": 1.6841233157458686e-05,
"loss": 0.0303,
"step": 12625
},
{
"epoch": 1.49,
"learning_rate": 1.680852919373828e-05,
"loss": 0.022,
"step": 12650
},
{
"epoch": 1.49,
"learning_rate": 1.6777133388566694e-05,
"loss": 0.0275,
"step": 12675
},
{
"epoch": 1.5,
"learning_rate": 1.674442942484629e-05,
"loss": 0.0332,
"step": 12700
},
{
"epoch": 1.5,
"learning_rate": 1.6711725461125888e-05,
"loss": 0.009,
"step": 12725
},
{
"epoch": 1.5,
"learning_rate": 1.6679021497405483e-05,
"loss": 0.0261,
"step": 12750
},
{
"epoch": 1.5,
"learning_rate": 1.6646317533685082e-05,
"loss": 0.0188,
"step": 12775
},
{
"epoch": 1.51,
"learning_rate": 1.6613613569964677e-05,
"loss": 0.0253,
"step": 12800
},
{
"epoch": 1.51,
"learning_rate": 1.658090960624428e-05,
"loss": 0.006,
"step": 12825
},
{
"epoch": 1.51,
"learning_rate": 1.6548205642523875e-05,
"loss": 0.0057,
"step": 12850
},
{
"epoch": 1.52,
"learning_rate": 1.6515501678803473e-05,
"loss": 0.0067,
"step": 12875
},
{
"epoch": 1.52,
"learning_rate": 1.648279771508307e-05,
"loss": 0.0369,
"step": 12900
},
{
"epoch": 1.52,
"learning_rate": 1.6450093751362667e-05,
"loss": 0.0171,
"step": 12925
},
{
"epoch": 1.52,
"learning_rate": 1.6417389787642263e-05,
"loss": 0.0276,
"step": 12950
},
{
"epoch": 1.53,
"learning_rate": 1.6385993982470676e-05,
"loss": 0.0246,
"step": 12975
},
{
"epoch": 1.53,
"learning_rate": 1.6353290018750274e-05,
"loss": 0.0508,
"step": 13000
},
{
"epoch": 1.53,
"learning_rate": 1.632058605502987e-05,
"loss": 0.0333,
"step": 13025
},
{
"epoch": 1.54,
"learning_rate": 1.628788209130947e-05,
"loss": 0.0104,
"step": 13050
},
{
"epoch": 1.54,
"learning_rate": 1.6255178127589064e-05,
"loss": 0.0062,
"step": 13075
},
{
"epoch": 1.54,
"learning_rate": 1.6222474163868662e-05,
"loss": 0.0266,
"step": 13100
},
{
"epoch": 1.55,
"learning_rate": 1.6189770200148258e-05,
"loss": 0.0194,
"step": 13125
},
{
"epoch": 1.55,
"learning_rate": 1.6157066236427856e-05,
"loss": 0.0214,
"step": 13150
},
{
"epoch": 1.55,
"learning_rate": 1.6124362272707452e-05,
"loss": 0.0171,
"step": 13175
},
{
"epoch": 1.55,
"learning_rate": 1.609165830898705e-05,
"loss": 0.0265,
"step": 13200
},
{
"epoch": 1.56,
"learning_rate": 1.6058954345266646e-05,
"loss": 0.0192,
"step": 13225
},
{
"epoch": 1.56,
"learning_rate": 1.6026250381546244e-05,
"loss": 0.0083,
"step": 13250
},
{
"epoch": 1.56,
"learning_rate": 1.599354641782584e-05,
"loss": 0.0024,
"step": 13275
},
{
"epoch": 1.57,
"learning_rate": 1.596084245410544e-05,
"loss": 0.0117,
"step": 13300
},
{
"epoch": 1.57,
"learning_rate": 1.5928138490385034e-05,
"loss": 0.0261,
"step": 13325
},
{
"epoch": 1.57,
"learning_rate": 1.5895434526664632e-05,
"loss": 0.015,
"step": 13350
},
{
"epoch": 1.57,
"learning_rate": 1.5862730562944228e-05,
"loss": 0.01,
"step": 13375
},
{
"epoch": 1.58,
"learning_rate": 1.5830026599223826e-05,
"loss": 0.0016,
"step": 13400
},
{
"epoch": 1.58,
"learning_rate": 1.5797322635503422e-05,
"loss": 0.0236,
"step": 13425
},
{
"epoch": 1.58,
"learning_rate": 1.576461867178302e-05,
"loss": 0.005,
"step": 13450
},
{
"epoch": 1.59,
"learning_rate": 1.5731914708062616e-05,
"loss": 0.0014,
"step": 13475
},
{
"epoch": 1.59,
"learning_rate": 1.5699210744342214e-05,
"loss": 0.0087,
"step": 13500
},
{
"epoch": 1.59,
"learning_rate": 1.566650678062181e-05,
"loss": 0.0326,
"step": 13525
},
{
"epoch": 1.6,
"learning_rate": 1.563380281690141e-05,
"loss": 0.0045,
"step": 13550
},
{
"epoch": 1.6,
"learning_rate": 1.5601098853181004e-05,
"loss": 0.0382,
"step": 13575
},
{
"epoch": 1.6,
"learning_rate": 1.5568394889460602e-05,
"loss": 0.0126,
"step": 13600
},
{
"epoch": 1.6,
"learning_rate": 1.5535690925740198e-05,
"loss": 0.0213,
"step": 13625
},
{
"epoch": 1.61,
"learning_rate": 1.5502986962019796e-05,
"loss": 0.0171,
"step": 13650
},
{
"epoch": 1.61,
"learning_rate": 1.5470282998299392e-05,
"loss": 0.0131,
"step": 13675
},
{
"epoch": 1.61,
"learning_rate": 1.543757903457899e-05,
"loss": 0.0176,
"step": 13700
},
{
"epoch": 1.62,
"learning_rate": 1.5404875070858586e-05,
"loss": 0.0021,
"step": 13725
},
{
"epoch": 1.62,
"learning_rate": 1.5372171107138184e-05,
"loss": 0.0328,
"step": 13750
},
{
"epoch": 1.62,
"learning_rate": 1.533946714341778e-05,
"loss": 0.0079,
"step": 13775
},
{
"epoch": 1.62,
"learning_rate": 1.530676317969738e-05,
"loss": 0.0077,
"step": 13800
},
{
"epoch": 1.63,
"learning_rate": 1.5274059215976977e-05,
"loss": 0.015,
"step": 13825
},
{
"epoch": 1.63,
"learning_rate": 1.5241355252256572e-05,
"loss": 0.0148,
"step": 13850
},
{
"epoch": 1.63,
"learning_rate": 1.520865128853617e-05,
"loss": 0.0002,
"step": 13875
},
{
"epoch": 1.64,
"learning_rate": 1.5175947324815766e-05,
"loss": 0.0042,
"step": 13900
},
{
"epoch": 1.64,
"learning_rate": 1.5143243361095363e-05,
"loss": 0.0132,
"step": 13925
},
{
"epoch": 1.64,
"learning_rate": 1.511053939737496e-05,
"loss": 0.0152,
"step": 13950
},
{
"epoch": 1.65,
"learning_rate": 1.507783543365456e-05,
"loss": 0.0003,
"step": 13975
},
{
"epoch": 1.65,
"learning_rate": 1.5045131469934158e-05,
"loss": 0.0216,
"step": 14000
},
{
"epoch": 1.65,
"learning_rate": 1.5012427506213755e-05,
"loss": 0.0176,
"step": 14025
},
{
"epoch": 1.65,
"learning_rate": 1.497972354249335e-05,
"loss": 0.0119,
"step": 14050
},
{
"epoch": 1.66,
"learning_rate": 1.4947019578772947e-05,
"loss": 0.0341,
"step": 14075
},
{
"epoch": 1.66,
"learning_rate": 1.4914315615052544e-05,
"loss": 0.006,
"step": 14100
},
{
"epoch": 1.66,
"learning_rate": 1.4881611651332141e-05,
"loss": 0.0066,
"step": 14125
},
{
"epoch": 1.67,
"learning_rate": 1.4850215846160556e-05,
"loss": 0.0087,
"step": 14150
},
{
"epoch": 1.67,
"learning_rate": 1.4817511882440153e-05,
"loss": 0.0007,
"step": 14175
},
{
"epoch": 1.67,
"learning_rate": 1.478480791871975e-05,
"loss": 0.0198,
"step": 14200
},
{
"epoch": 1.67,
"learning_rate": 1.4752103954999347e-05,
"loss": 0.0209,
"step": 14225
},
{
"epoch": 1.68,
"learning_rate": 1.4719399991278944e-05,
"loss": 0.0134,
"step": 14250
},
{
"epoch": 1.68,
"learning_rate": 1.468669602755854e-05,
"loss": 0.0085,
"step": 14275
},
{
"epoch": 1.68,
"learning_rate": 1.4653992063838138e-05,
"loss": 0.0295,
"step": 14300
},
{
"epoch": 1.69,
"learning_rate": 1.4621288100117735e-05,
"loss": 0.0116,
"step": 14325
},
{
"epoch": 1.69,
"learning_rate": 1.4588584136397332e-05,
"loss": 0.0157,
"step": 14350
},
{
"epoch": 1.69,
"learning_rate": 1.4555880172676929e-05,
"loss": 0.0091,
"step": 14375
},
{
"epoch": 1.7,
"learning_rate": 1.4523176208956526e-05,
"loss": 0.0323,
"step": 14400
},
{
"epoch": 1.7,
"learning_rate": 1.4490472245236123e-05,
"loss": 0.0064,
"step": 14425
},
{
"epoch": 1.7,
"learning_rate": 1.4459076440064536e-05,
"loss": 0.0137,
"step": 14450
},
{
"epoch": 1.7,
"learning_rate": 1.4426372476344133e-05,
"loss": 0.0182,
"step": 14475
},
{
"epoch": 1.71,
"learning_rate": 1.439366851262373e-05,
"loss": 0.0312,
"step": 14500
},
{
"epoch": 1.71,
"learning_rate": 1.4360964548903327e-05,
"loss": 0.0171,
"step": 14525
},
{
"epoch": 1.71,
"learning_rate": 1.4328260585182925e-05,
"loss": 0.0003,
"step": 14550
},
{
"epoch": 1.72,
"learning_rate": 1.4295556621462522e-05,
"loss": 0.0066,
"step": 14575
},
{
"epoch": 1.72,
"learning_rate": 1.426285265774212e-05,
"loss": 0.0058,
"step": 14600
},
{
"epoch": 1.72,
"learning_rate": 1.4230148694021716e-05,
"loss": 0.0216,
"step": 14625
},
{
"epoch": 1.72,
"learning_rate": 1.4197444730301313e-05,
"loss": 0.0181,
"step": 14650
},
{
"epoch": 1.73,
"learning_rate": 1.416474076658091e-05,
"loss": 0.0214,
"step": 14675
},
{
"epoch": 1.73,
"learning_rate": 1.4132036802860507e-05,
"loss": 0.008,
"step": 14700
},
{
"epoch": 1.73,
"learning_rate": 1.4099332839140104e-05,
"loss": 0.0163,
"step": 14725
},
{
"epoch": 1.74,
"learning_rate": 1.4066628875419701e-05,
"loss": 0.0078,
"step": 14750
},
{
"epoch": 1.74,
"learning_rate": 1.4033924911699298e-05,
"loss": 0.0243,
"step": 14775
},
{
"epoch": 1.74,
"learning_rate": 1.4001220947978895e-05,
"loss": 0.012,
"step": 14800
},
{
"epoch": 1.75,
"learning_rate": 1.3968516984258492e-05,
"loss": 0.0034,
"step": 14825
},
{
"epoch": 1.75,
"learning_rate": 1.393581302053809e-05,
"loss": 0.0073,
"step": 14850
},
{
"epoch": 1.75,
"learning_rate": 1.3903109056817686e-05,
"loss": 0.023,
"step": 14875
},
{
"epoch": 1.75,
"learning_rate": 1.3870405093097283e-05,
"loss": 0.0123,
"step": 14900
},
{
"epoch": 1.76,
"learning_rate": 1.383770112937688e-05,
"loss": 0.0012,
"step": 14925
},
{
"epoch": 1.76,
"learning_rate": 1.3806305324205293e-05,
"loss": 0.0245,
"step": 14950
},
{
"epoch": 1.76,
"learning_rate": 1.377360136048489e-05,
"loss": 0.0165,
"step": 14975
},
{
"epoch": 1.77,
"learning_rate": 1.3740897396764487e-05,
"loss": 0.0203,
"step": 15000
},
{
"epoch": 1.77,
"learning_rate": 1.3708193433044084e-05,
"loss": 0.0018,
"step": 15025
},
{
"epoch": 1.77,
"learning_rate": 1.3675489469323681e-05,
"loss": 0.0045,
"step": 15050
},
{
"epoch": 1.77,
"learning_rate": 1.3642785505603278e-05,
"loss": 0.0046,
"step": 15075
},
{
"epoch": 1.78,
"learning_rate": 1.3610081541882875e-05,
"loss": 0.0014,
"step": 15100
},
{
"epoch": 1.78,
"learning_rate": 1.3577377578162472e-05,
"loss": 0.0092,
"step": 15125
},
{
"epoch": 1.78,
"learning_rate": 1.3544673614442071e-05,
"loss": 0.0134,
"step": 15150
},
{
"epoch": 1.79,
"learning_rate": 1.3511969650721668e-05,
"loss": 0.0032,
"step": 15175
},
{
"epoch": 1.79,
"learning_rate": 1.3479265687001265e-05,
"loss": 0.0095,
"step": 15200
},
{
"epoch": 1.79,
"learning_rate": 1.3446561723280862e-05,
"loss": 0.021,
"step": 15225
},
{
"epoch": 1.8,
"learning_rate": 1.3413857759560459e-05,
"loss": 0.0293,
"step": 15250
},
{
"epoch": 1.8,
"learning_rate": 1.3381153795840056e-05,
"loss": 0.0122,
"step": 15275
},
{
"epoch": 1.8,
"learning_rate": 1.3348449832119653e-05,
"loss": 0.0027,
"step": 15300
},
{
"epoch": 1.8,
"learning_rate": 1.3315745868399252e-05,
"loss": 0.0054,
"step": 15325
},
{
"epoch": 1.81,
"learning_rate": 1.3283041904678849e-05,
"loss": 0.0292,
"step": 15350
},
{
"epoch": 1.81,
"learning_rate": 1.3250337940958446e-05,
"loss": 0.0371,
"step": 15375
},
{
"epoch": 1.81,
"learning_rate": 1.3217633977238043e-05,
"loss": 0.0022,
"step": 15400
},
{
"epoch": 1.82,
"learning_rate": 1.318493001351764e-05,
"loss": 0.0084,
"step": 15425
},
{
"epoch": 1.82,
"learning_rate": 1.3152226049797237e-05,
"loss": 0.0054,
"step": 15450
},
{
"epoch": 1.82,
"learning_rate": 1.3119522086076834e-05,
"loss": 0.0002,
"step": 15475
},
{
"epoch": 1.82,
"learning_rate": 1.308681812235643e-05,
"loss": 0.008,
"step": 15500
},
{
"epoch": 1.83,
"learning_rate": 1.3054114158636028e-05,
"loss": 0.0052,
"step": 15525
},
{
"epoch": 1.83,
"learning_rate": 1.3021410194915625e-05,
"loss": 0.0003,
"step": 15550
},
{
"epoch": 1.83,
"learning_rate": 1.2988706231195222e-05,
"loss": 0.0205,
"step": 15575
},
{
"epoch": 1.84,
"learning_rate": 1.2956002267474819e-05,
"loss": 0.0004,
"step": 15600
},
{
"epoch": 1.84,
"learning_rate": 1.2923298303754416e-05,
"loss": 0.0105,
"step": 15625
},
{
"epoch": 1.84,
"learning_rate": 1.2890594340034013e-05,
"loss": 0.0177,
"step": 15650
},
{
"epoch": 1.85,
"learning_rate": 1.285789037631361e-05,
"loss": 0.0185,
"step": 15675
},
{
"epoch": 1.85,
"learning_rate": 1.2825186412593207e-05,
"loss": 0.0211,
"step": 15700
},
{
"epoch": 1.85,
"learning_rate": 1.2792482448872804e-05,
"loss": 0.0073,
"step": 15725
},
{
"epoch": 1.85,
"learning_rate": 1.27597784851524e-05,
"loss": 0.0334,
"step": 15750
},
{
"epoch": 1.86,
"learning_rate": 1.2727074521431998e-05,
"loss": 0.0338,
"step": 15775
},
{
"epoch": 1.86,
"learning_rate": 1.2694370557711595e-05,
"loss": 0.0243,
"step": 15800
},
{
"epoch": 1.86,
"learning_rate": 1.2661666593991192e-05,
"loss": 0.0215,
"step": 15825
},
{
"epoch": 1.87,
"learning_rate": 1.2628962630270789e-05,
"loss": 0.0318,
"step": 15850
},
{
"epoch": 1.87,
"learning_rate": 1.2596258666550386e-05,
"loss": 0.0148,
"step": 15875
},
{
"epoch": 1.87,
"learning_rate": 1.2563554702829983e-05,
"loss": 0.0194,
"step": 15900
},
{
"epoch": 1.87,
"learning_rate": 1.253085073910958e-05,
"loss": 0.0004,
"step": 15925
},
{
"epoch": 1.88,
"learning_rate": 1.2498146775389177e-05,
"loss": 0.0031,
"step": 15950
},
{
"epoch": 1.88,
"learning_rate": 1.2465442811668774e-05,
"loss": 0.0195,
"step": 15975
},
{
"epoch": 1.88,
"learning_rate": 1.243273884794837e-05,
"loss": 0.0005,
"step": 16000
},
{
"epoch": 1.89,
"learning_rate": 1.2400034884227968e-05,
"loss": 0.0046,
"step": 16025
},
{
"epoch": 1.89,
"learning_rate": 1.2367330920507565e-05,
"loss": 0.0007,
"step": 16050
},
{
"epoch": 1.89,
"learning_rate": 1.2334626956787162e-05,
"loss": 0.0228,
"step": 16075
},
{
"epoch": 1.9,
"learning_rate": 1.2301922993066759e-05,
"loss": 0.0037,
"step": 16100
},
{
"epoch": 1.9,
"learning_rate": 1.2269219029346356e-05,
"loss": 0.0085,
"step": 16125
},
{
"epoch": 1.9,
"learning_rate": 1.2236515065625953e-05,
"loss": 0.0112,
"step": 16150
},
{
"epoch": 1.9,
"learning_rate": 1.2203811101905551e-05,
"loss": 0.0001,
"step": 16175
},
{
"epoch": 1.91,
"learning_rate": 1.2171107138185148e-05,
"loss": 0.0032,
"step": 16200
},
{
"epoch": 1.91,
"learning_rate": 1.2138403174464745e-05,
"loss": 0.0058,
"step": 16225
},
{
"epoch": 1.91,
"learning_rate": 1.2105699210744342e-05,
"loss": 0.0061,
"step": 16250
},
{
"epoch": 1.92,
"learning_rate": 1.207299524702394e-05,
"loss": 0.0071,
"step": 16275
},
{
"epoch": 1.92,
"learning_rate": 1.2040291283303538e-05,
"loss": 0.0127,
"step": 16300
},
{
"epoch": 1.92,
"learning_rate": 1.2007587319583135e-05,
"loss": 0.0081,
"step": 16325
},
{
"epoch": 1.92,
"learning_rate": 1.1974883355862732e-05,
"loss": 0.0148,
"step": 16350
},
{
"epoch": 1.93,
"learning_rate": 1.1942179392142329e-05,
"loss": 0.0253,
"step": 16375
},
{
"epoch": 1.93,
"learning_rate": 1.1909475428421926e-05,
"loss": 0.022,
"step": 16400
},
{
"epoch": 1.93,
"learning_rate": 1.1876771464701523e-05,
"loss": 0.0004,
"step": 16425
},
{
"epoch": 1.94,
"learning_rate": 1.184406750098112e-05,
"loss": 0.0067,
"step": 16450
},
{
"epoch": 1.94,
"learning_rate": 1.1811363537260717e-05,
"loss": 0.0001,
"step": 16475
},
{
"epoch": 1.94,
"learning_rate": 1.1778659573540314e-05,
"loss": 0.0038,
"step": 16500
},
{
"epoch": 1.95,
"learning_rate": 1.1745955609819911e-05,
"loss": 0.0187,
"step": 16525
},
{
"epoch": 1.95,
"learning_rate": 1.1713251646099508e-05,
"loss": 0.0026,
"step": 16550
},
{
"epoch": 1.95,
"learning_rate": 1.1680547682379105e-05,
"loss": 0.0124,
"step": 16575
},
{
"epoch": 1.95,
"learning_rate": 1.1647843718658702e-05,
"loss": 0.0203,
"step": 16600
},
{
"epoch": 1.96,
"learning_rate": 1.1615139754938299e-05,
"loss": 0.0119,
"step": 16625
},
{
"epoch": 1.96,
"learning_rate": 1.1582435791217896e-05,
"loss": 0.0185,
"step": 16650
},
{
"epoch": 1.96,
"learning_rate": 1.1549731827497493e-05,
"loss": 0.0058,
"step": 16675
},
{
"epoch": 1.97,
"learning_rate": 1.151702786377709e-05,
"loss": 0.0151,
"step": 16700
},
{
"epoch": 1.97,
"learning_rate": 1.1484323900056687e-05,
"loss": 0.0177,
"step": 16725
},
{
"epoch": 1.97,
"learning_rate": 1.1451619936336284e-05,
"loss": 0.0066,
"step": 16750
},
{
"epoch": 1.97,
"learning_rate": 1.1418915972615881e-05,
"loss": 0.0013,
"step": 16775
},
{
"epoch": 1.98,
"learning_rate": 1.1386212008895478e-05,
"loss": 0.0154,
"step": 16800
},
{
"epoch": 1.98,
"learning_rate": 1.1353508045175075e-05,
"loss": 0.0161,
"step": 16825
},
{
"epoch": 1.98,
"learning_rate": 1.1320804081454672e-05,
"loss": 0.0123,
"step": 16850
},
{
"epoch": 1.99,
"learning_rate": 1.1288100117734269e-05,
"loss": 0.0215,
"step": 16875
},
{
"epoch": 1.99,
"learning_rate": 1.1255396154013866e-05,
"loss": 0.0025,
"step": 16900
},
{
"epoch": 1.99,
"learning_rate": 1.1222692190293463e-05,
"loss": 0.0094,
"step": 16925
},
{
"epoch": 2.0,
"learning_rate": 1.118998822657306e-05,
"loss": 0.0151,
"step": 16950
},
{
"epoch": 2.0,
"learning_rate": 1.1157284262852657e-05,
"loss": 0.0003,
"step": 16975
},
{
"epoch": 2.0,
"eval_accuracy": 0.8459595959595959,
"eval_loss": 1.005859375,
"eval_runtime": 4369.1863,
"eval_samples_per_second": 7.885,
"eval_steps_per_second": 0.246,
"step": 16988
},
{
"epoch": 2.0,
"learning_rate": 1.1124580299132256e-05,
"loss": 0.0051,
"step": 17000
},
{
"epoch": 2.0,
"learning_rate": 1.1091876335411853e-05,
"loss": 0.0036,
"step": 17025
},
{
"epoch": 2.01,
"learning_rate": 1.105917237169145e-05,
"loss": 0.0029,
"step": 17050
},
{
"epoch": 2.01,
"learning_rate": 1.1026468407971047e-05,
"loss": 0.0001,
"step": 17075
},
{
"epoch": 2.01,
"learning_rate": 1.0993764444250644e-05,
"loss": 0.0001,
"step": 17100
},
{
"epoch": 2.02,
"learning_rate": 1.096106048053024e-05,
"loss": 0.0001,
"step": 17125
},
{
"epoch": 2.02,
"learning_rate": 1.0928356516809838e-05,
"loss": 0.0289,
"step": 17150
},
{
"epoch": 2.02,
"learning_rate": 1.0895652553089435e-05,
"loss": 0.0276,
"step": 17175
},
{
"epoch": 2.02,
"learning_rate": 1.0862948589369032e-05,
"loss": 0.0061,
"step": 17200
},
{
"epoch": 2.03,
"learning_rate": 1.0830244625648629e-05,
"loss": 0.0002,
"step": 17225
},
{
"epoch": 2.03,
"learning_rate": 1.0797540661928226e-05,
"loss": 0.0001,
"step": 17250
},
{
"epoch": 2.03,
"learning_rate": 1.0764836698207823e-05,
"loss": 0.0002,
"step": 17275
},
{
"epoch": 2.04,
"learning_rate": 1.073213273448742e-05,
"loss": 0.0003,
"step": 17300
},
{
"epoch": 2.04,
"learning_rate": 1.0700736929315834e-05,
"loss": 0.0153,
"step": 17325
},
{
"epoch": 2.04,
"learning_rate": 1.0668032965595431e-05,
"loss": 0.0268,
"step": 17350
},
{
"epoch": 2.05,
"learning_rate": 1.0635329001875028e-05,
"loss": 0.0122,
"step": 17375
},
{
"epoch": 2.05,
"learning_rate": 1.0602625038154625e-05,
"loss": 0.0133,
"step": 17400
},
{
"epoch": 2.05,
"learning_rate": 1.0569921074434222e-05,
"loss": 0.0006,
"step": 17425
},
{
"epoch": 2.05,
"learning_rate": 1.053721711071382e-05,
"loss": 0.0003,
"step": 17450
},
{
"epoch": 2.06,
"learning_rate": 1.0504513146993416e-05,
"loss": 0.0003,
"step": 17475
},
{
"epoch": 2.06,
"learning_rate": 1.0471809183273013e-05,
"loss": 0.0047,
"step": 17500
},
{
"epoch": 2.06,
"learning_rate": 1.043910521955261e-05,
"loss": 0.0091,
"step": 17525
},
{
"epoch": 2.07,
"learning_rate": 1.0406401255832207e-05,
"loss": 0.0295,
"step": 17550
},
{
"epoch": 2.07,
"learning_rate": 1.0373697292111804e-05,
"loss": 0.0008,
"step": 17575
},
{
"epoch": 2.07,
"learning_rate": 1.0342301486940217e-05,
"loss": 0.0166,
"step": 17600
},
{
"epoch": 2.07,
"learning_rate": 1.0309597523219816e-05,
"loss": 0.0091,
"step": 17625
},
{
"epoch": 2.08,
"learning_rate": 1.0276893559499413e-05,
"loss": 0.0002,
"step": 17650
},
{
"epoch": 2.08,
"learning_rate": 1.024418959577901e-05,
"loss": 0.0001,
"step": 17675
},
{
"epoch": 2.08,
"learning_rate": 1.0211485632058607e-05,
"loss": 0.0189,
"step": 17700
},
{
"epoch": 2.09,
"learning_rate": 1.0178781668338204e-05,
"loss": 0.0006,
"step": 17725
},
{
"epoch": 2.09,
"learning_rate": 1.0146077704617801e-05,
"loss": 0.0041,
"step": 17750
},
{
"epoch": 2.09,
"learning_rate": 1.0113373740897398e-05,
"loss": 0.0005,
"step": 17775
},
{
"epoch": 2.1,
"learning_rate": 1.0080669777176995e-05,
"loss": 0.0101,
"step": 17800
},
{
"epoch": 2.1,
"learning_rate": 1.0047965813456592e-05,
"loss": 0.0001,
"step": 17825
},
{
"epoch": 2.1,
"learning_rate": 1.0015261849736189e-05,
"loss": 0.0003,
"step": 17850
},
{
"epoch": 2.1,
"learning_rate": 9.982557886015786e-06,
"loss": 0.0117,
"step": 17875
},
{
"epoch": 2.11,
"learning_rate": 9.949853922295383e-06,
"loss": 0.0069,
"step": 17900
},
{
"epoch": 2.11,
"learning_rate": 9.91714995857498e-06,
"loss": 0.0036,
"step": 17925
},
{
"epoch": 2.11,
"learning_rate": 9.884445994854577e-06,
"loss": 0.0296,
"step": 17950
},
{
"epoch": 2.12,
"learning_rate": 9.851742031134174e-06,
"loss": 0.0003,
"step": 17975
},
{
"epoch": 2.12,
"learning_rate": 9.819038067413771e-06,
"loss": 0.0138,
"step": 18000
},
{
"epoch": 2.12,
"learning_rate": 9.786334103693368e-06,
"loss": 0.0067,
"step": 18025
},
{
"epoch": 2.13,
"learning_rate": 9.753630139972965e-06,
"loss": 0.0021,
"step": 18050
},
{
"epoch": 2.13,
"learning_rate": 9.720926176252562e-06,
"loss": 0.0092,
"step": 18075
},
{
"epoch": 2.13,
"learning_rate": 9.688222212532159e-06,
"loss": 0.0001,
"step": 18100
},
{
"epoch": 2.13,
"learning_rate": 9.655518248811756e-06,
"loss": 0.0022,
"step": 18125
},
{
"epoch": 2.14,
"learning_rate": 9.622814285091353e-06,
"loss": 0.0067,
"step": 18150
},
{
"epoch": 2.14,
"learning_rate": 9.59011032137095e-06,
"loss": 0.0004,
"step": 18175
},
{
"epoch": 2.14,
"learning_rate": 9.557406357650547e-06,
"loss": 0.0004,
"step": 18200
},
{
"epoch": 2.15,
"learning_rate": 9.524702393930144e-06,
"loss": 0.0073,
"step": 18225
},
{
"epoch": 2.15,
"learning_rate": 9.491998430209741e-06,
"loss": 0.0001,
"step": 18250
},
{
"epoch": 2.15,
"learning_rate": 9.459294466489338e-06,
"loss": 0.0001,
"step": 18275
},
{
"epoch": 2.15,
"learning_rate": 9.426590502768935e-06,
"loss": 0.0004,
"step": 18300
},
{
"epoch": 2.16,
"learning_rate": 9.393886539048532e-06,
"loss": 0.0001,
"step": 18325
},
{
"epoch": 2.16,
"learning_rate": 9.361182575328129e-06,
"loss": 0.0001,
"step": 18350
},
{
"epoch": 2.16,
"learning_rate": 9.328478611607726e-06,
"loss": 0.007,
"step": 18375
},
{
"epoch": 2.17,
"learning_rate": 9.295774647887323e-06,
"loss": 0.0001,
"step": 18400
},
{
"epoch": 2.17,
"learning_rate": 9.26307068416692e-06,
"loss": 0.0001,
"step": 18425
},
{
"epoch": 2.17,
"learning_rate": 9.230366720446517e-06,
"loss": 0.0076,
"step": 18450
},
{
"epoch": 2.18,
"learning_rate": 9.197662756726116e-06,
"loss": 0.0001,
"step": 18475
},
{
"epoch": 2.18,
"learning_rate": 9.164958793005713e-06,
"loss": 0.0079,
"step": 18500
},
{
"epoch": 2.18,
"learning_rate": 9.13225482928531e-06,
"loss": 0.007,
"step": 18525
},
{
"epoch": 2.18,
"learning_rate": 9.099550865564907e-06,
"loss": 0.011,
"step": 18550
},
{
"epoch": 2.19,
"learning_rate": 9.066846901844504e-06,
"loss": 0.0001,
"step": 18575
},
{
"epoch": 2.19,
"learning_rate": 9.0341429381241e-06,
"loss": 0.0137,
"step": 18600
},
{
"epoch": 2.19,
"learning_rate": 9.001438974403698e-06,
"loss": 0.0001,
"step": 18625
},
{
"epoch": 2.2,
"learning_rate": 8.968735010683296e-06,
"loss": 0.009,
"step": 18650
},
{
"epoch": 2.2,
"learning_rate": 8.936031046962893e-06,
"loss": 0.0044,
"step": 18675
},
{
"epoch": 2.2,
"learning_rate": 8.90332708324249e-06,
"loss": 0.0022,
"step": 18700
},
{
"epoch": 2.2,
"learning_rate": 8.870623119522087e-06,
"loss": 0.0101,
"step": 18725
},
{
"epoch": 2.21,
"learning_rate": 8.837919155801684e-06,
"loss": 0.0001,
"step": 18750
},
{
"epoch": 2.21,
"learning_rate": 8.805215192081281e-06,
"loss": 0.0009,
"step": 18775
},
{
"epoch": 2.21,
"learning_rate": 8.772511228360878e-06,
"loss": 0.0024,
"step": 18800
},
{
"epoch": 2.22,
"learning_rate": 8.739807264640475e-06,
"loss": 0.0001,
"step": 18825
},
{
"epoch": 2.22,
"learning_rate": 8.707103300920072e-06,
"loss": 0.0065,
"step": 18850
},
{
"epoch": 2.22,
"learning_rate": 8.67439933719967e-06,
"loss": 0.0003,
"step": 18875
},
{
"epoch": 2.23,
"learning_rate": 8.641695373479266e-06,
"loss": 0.0129,
"step": 18900
},
{
"epoch": 2.23,
"learning_rate": 8.608991409758863e-06,
"loss": 0.0151,
"step": 18925
},
{
"epoch": 2.23,
"learning_rate": 8.57628744603846e-06,
"loss": 0.0005,
"step": 18950
},
{
"epoch": 2.23,
"learning_rate": 8.543583482318057e-06,
"loss": 0.0062,
"step": 18975
},
{
"epoch": 2.24,
"learning_rate": 8.510879518597654e-06,
"loss": 0.0082,
"step": 19000
},
{
"epoch": 2.24,
"learning_rate": 8.478175554877251e-06,
"loss": 0.0001,
"step": 19025
},
{
"epoch": 2.24,
"learning_rate": 8.445471591156848e-06,
"loss": 0.0076,
"step": 19050
},
{
"epoch": 2.25,
"learning_rate": 8.412767627436445e-06,
"loss": 0.0001,
"step": 19075
},
{
"epoch": 2.25,
"learning_rate": 8.380063663716042e-06,
"loss": 0.0064,
"step": 19100
},
{
"epoch": 2.25,
"learning_rate": 8.34735969999564e-06,
"loss": 0.0077,
"step": 19125
},
{
"epoch": 2.25,
"learning_rate": 8.314655736275236e-06,
"loss": 0.0026,
"step": 19150
},
{
"epoch": 2.26,
"learning_rate": 8.281951772554833e-06,
"loss": 0.0038,
"step": 19175
},
{
"epoch": 2.26,
"learning_rate": 8.24924780883443e-06,
"loss": 0.0201,
"step": 19200
},
{
"epoch": 2.26,
"learning_rate": 8.216543845114027e-06,
"loss": 0.0055,
"step": 19225
},
{
"epoch": 2.27,
"learning_rate": 8.183839881393624e-06,
"loss": 0.0035,
"step": 19250
},
{
"epoch": 2.27,
"learning_rate": 8.151135917673221e-06,
"loss": 0.0058,
"step": 19275
},
{
"epoch": 2.27,
"learning_rate": 8.11843195395282e-06,
"loss": 0.0052,
"step": 19300
},
{
"epoch": 2.28,
"learning_rate": 8.085727990232417e-06,
"loss": 0.0027,
"step": 19325
},
{
"epoch": 2.28,
"learning_rate": 8.053024026512014e-06,
"loss": 0.0004,
"step": 19350
},
{
"epoch": 2.28,
"learning_rate": 8.020320062791611e-06,
"loss": 0.0076,
"step": 19375
},
{
"epoch": 2.28,
"learning_rate": 7.987616099071208e-06,
"loss": 0.008,
"step": 19400
},
{
"epoch": 2.29,
"learning_rate": 7.954912135350805e-06,
"loss": 0.0239,
"step": 19425
},
{
"epoch": 2.29,
"learning_rate": 7.922208171630402e-06,
"loss": 0.0126,
"step": 19450
},
{
"epoch": 2.29,
"learning_rate": 7.889504207909999e-06,
"loss": 0.0057,
"step": 19475
},
{
"epoch": 2.3,
"learning_rate": 7.856800244189596e-06,
"loss": 0.0002,
"step": 19500
},
{
"epoch": 2.3,
"learning_rate": 7.824096280469193e-06,
"loss": 0.0022,
"step": 19525
},
{
"epoch": 2.3,
"learning_rate": 7.79139231674879e-06,
"loss": 0.0107,
"step": 19550
},
{
"epoch": 2.3,
"learning_rate": 7.758688353028387e-06,
"loss": 0.0002,
"step": 19575
},
{
"epoch": 2.31,
"learning_rate": 7.728600706405616e-06,
"loss": 0.001,
"step": 19600
},
{
"epoch": 2.31,
"learning_rate": 7.695896742685213e-06,
"loss": 0.0003,
"step": 19625
},
{
"epoch": 2.31,
"learning_rate": 7.66319277896481e-06,
"loss": 0.0001,
"step": 19650
},
{
"epoch": 2.32,
"learning_rate": 7.630488815244407e-06,
"loss": 0.0001,
"step": 19675
},
{
"epoch": 2.32,
"learning_rate": 7.597784851524005e-06,
"loss": 0.0007,
"step": 19700
},
{
"epoch": 2.32,
"learning_rate": 7.565080887803602e-06,
"loss": 0.0039,
"step": 19725
},
{
"epoch": 2.33,
"learning_rate": 7.532376924083199e-06,
"loss": 0.0022,
"step": 19750
},
{
"epoch": 2.33,
"learning_rate": 7.499672960362797e-06,
"loss": 0.0045,
"step": 19775
},
{
"epoch": 2.33,
"learning_rate": 7.466968996642394e-06,
"loss": 0.0052,
"step": 19800
},
{
"epoch": 2.33,
"learning_rate": 7.434265032921991e-06,
"loss": 0.0022,
"step": 19825
},
{
"epoch": 2.34,
"learning_rate": 7.401561069201588e-06,
"loss": 0.0001,
"step": 19850
},
{
"epoch": 2.34,
"learning_rate": 7.368857105481185e-06,
"loss": 0.0042,
"step": 19875
},
{
"epoch": 2.34,
"learning_rate": 7.336153141760782e-06,
"loss": 0.0017,
"step": 19900
},
{
"epoch": 2.35,
"learning_rate": 7.303449178040379e-06,
"loss": 0.0162,
"step": 19925
},
{
"epoch": 2.35,
"learning_rate": 7.270745214319976e-06,
"loss": 0.0004,
"step": 19950
},
{
"epoch": 2.35,
"learning_rate": 7.238041250599573e-06,
"loss": 0.0001,
"step": 19975
},
{
"epoch": 2.35,
"learning_rate": 7.20533728687917e-06,
"loss": 0.0001,
"step": 20000
},
{
"epoch": 2.36,
"learning_rate": 7.172633323158767e-06,
"loss": 0.0001,
"step": 20025
},
{
"epoch": 2.36,
"learning_rate": 7.139929359438364e-06,
"loss": 0.0143,
"step": 20050
},
{
"epoch": 2.36,
"learning_rate": 7.107225395717961e-06,
"loss": 0.0069,
"step": 20075
},
{
"epoch": 2.37,
"learning_rate": 7.074521431997558e-06,
"loss": 0.0022,
"step": 20100
},
{
"epoch": 2.37,
"learning_rate": 7.0418174682771554e-06,
"loss": 0.0001,
"step": 20125
},
{
"epoch": 2.37,
"learning_rate": 7.0091135045567524e-06,
"loss": 0.0122,
"step": 20150
},
{
"epoch": 2.38,
"learning_rate": 6.9764095408363494e-06,
"loss": 0.0007,
"step": 20175
},
{
"epoch": 2.38,
"learning_rate": 6.943705577115947e-06,
"loss": 0.0001,
"step": 20200
},
{
"epoch": 2.38,
"learning_rate": 6.911001613395544e-06,
"loss": 0.0001,
"step": 20225
},
{
"epoch": 2.38,
"learning_rate": 6.878297649675141e-06,
"loss": 0.0001,
"step": 20250
},
{
"epoch": 2.39,
"learning_rate": 6.845593685954738e-06,
"loss": 0.0001,
"step": 20275
},
{
"epoch": 2.39,
"learning_rate": 6.812889722234335e-06,
"loss": 0.0053,
"step": 20300
},
{
"epoch": 2.39,
"learning_rate": 6.780185758513932e-06,
"loss": 0.0007,
"step": 20325
},
{
"epoch": 2.4,
"learning_rate": 6.747481794793529e-06,
"loss": 0.0089,
"step": 20350
},
{
"epoch": 2.4,
"learning_rate": 6.714777831073126e-06,
"loss": 0.0181,
"step": 20375
},
{
"epoch": 2.4,
"learning_rate": 6.682073867352723e-06,
"loss": 0.0231,
"step": 20400
},
{
"epoch": 2.4,
"learning_rate": 6.64936990363232e-06,
"loss": 0.0116,
"step": 20425
},
{
"epoch": 2.41,
"learning_rate": 6.616665939911917e-06,
"loss": 0.0151,
"step": 20450
},
{
"epoch": 2.41,
"learning_rate": 6.583961976191514e-06,
"loss": 0.0001,
"step": 20475
},
{
"epoch": 2.41,
"learning_rate": 6.551258012471111e-06,
"loss": 0.005,
"step": 20500
},
{
"epoch": 2.42,
"learning_rate": 6.518554048750708e-06,
"loss": 0.0032,
"step": 20525
},
{
"epoch": 2.42,
"learning_rate": 6.485850085030306e-06,
"loss": 0.0001,
"step": 20550
},
{
"epoch": 2.42,
"learning_rate": 6.453146121309903e-06,
"loss": 0.0133,
"step": 20575
},
{
"epoch": 2.43,
"learning_rate": 6.423058474687132e-06,
"loss": 0.0001,
"step": 20600
},
{
"epoch": 2.43,
"learning_rate": 6.390354510966729e-06,
"loss": 0.0106,
"step": 20625
},
{
"epoch": 2.43,
"learning_rate": 6.357650547246326e-06,
"loss": 0.0062,
"step": 20650
},
{
"epoch": 2.43,
"learning_rate": 6.324946583525923e-06,
"loss": 0.0001,
"step": 20675
},
{
"epoch": 2.44,
"learning_rate": 6.29224261980552e-06,
"loss": 0.008,
"step": 20700
},
{
"epoch": 2.44,
"learning_rate": 6.259538656085117e-06,
"loss": 0.0001,
"step": 20725
},
{
"epoch": 2.44,
"learning_rate": 6.226834692364715e-06,
"loss": 0.0005,
"step": 20750
},
{
"epoch": 2.45,
"learning_rate": 6.194130728644312e-06,
"loss": 0.0035,
"step": 20775
},
{
"epoch": 2.45,
"learning_rate": 6.161426764923909e-06,
"loss": 0.0001,
"step": 20800
},
{
"epoch": 2.45,
"learning_rate": 6.128722801203506e-06,
"loss": 0.0001,
"step": 20825
},
{
"epoch": 2.45,
"learning_rate": 6.096018837483103e-06,
"loss": 0.0087,
"step": 20850
},
{
"epoch": 2.46,
"learning_rate": 6.063314873762701e-06,
"loss": 0.0,
"step": 20875
},
{
"epoch": 2.46,
"learning_rate": 6.030610910042298e-06,
"loss": 0.0003,
"step": 20900
},
{
"epoch": 2.46,
"learning_rate": 5.997906946321895e-06,
"loss": 0.0004,
"step": 20925
},
{
"epoch": 2.47,
"learning_rate": 5.965202982601492e-06,
"loss": 0.0001,
"step": 20950
},
{
"epoch": 2.47,
"learning_rate": 5.932499018881089e-06,
"loss": 0.0001,
"step": 20975
},
{
"epoch": 2.47,
"learning_rate": 5.899795055160686e-06,
"loss": 0.0005,
"step": 21000
},
{
"epoch": 2.48,
"learning_rate": 5.867091091440283e-06,
"loss": 0.0001,
"step": 21025
},
{
"epoch": 2.48,
"learning_rate": 5.83438712771988e-06,
"loss": 0.0072,
"step": 21050
},
{
"epoch": 2.48,
"learning_rate": 5.801683163999477e-06,
"loss": 0.0001,
"step": 21075
},
{
"epoch": 2.48,
"learning_rate": 5.768979200279074e-06,
"loss": 0.0002,
"step": 21100
},
{
"epoch": 2.49,
"learning_rate": 5.736275236558671e-06,
"loss": 0.0001,
"step": 21125
},
{
"epoch": 2.49,
"learning_rate": 5.703571272838268e-06,
"loss": 0.0026,
"step": 21150
},
{
"epoch": 2.49,
"learning_rate": 5.670867309117866e-06,
"loss": 0.0009,
"step": 21175
},
{
"epoch": 2.5,
"learning_rate": 5.6381633453974626e-06,
"loss": 0.0102,
"step": 21200
},
{
"epoch": 2.5,
"learning_rate": 5.6054593816770596e-06,
"loss": 0.0096,
"step": 21225
},
{
"epoch": 2.5,
"learning_rate": 5.5727554179566566e-06,
"loss": 0.0001,
"step": 21250
},
{
"epoch": 2.5,
"learning_rate": 5.5400514542362536e-06,
"loss": 0.0124,
"step": 21275
},
{
"epoch": 2.51,
"learning_rate": 5.5073474905158506e-06,
"loss": 0.0049,
"step": 21300
},
{
"epoch": 2.51,
"learning_rate": 5.4746435267954476e-06,
"loss": 0.0001,
"step": 21325
},
{
"epoch": 2.51,
"learning_rate": 5.4419395630750446e-06,
"loss": 0.0062,
"step": 21350
},
{
"epoch": 2.52,
"learning_rate": 5.4092355993546416e-06,
"loss": 0.0,
"step": 21375
},
{
"epoch": 2.52,
"learning_rate": 5.3765316356342386e-06,
"loss": 0.0001,
"step": 21400
},
{
"epoch": 2.52,
"learning_rate": 5.3438276719138356e-06,
"loss": 0.0001,
"step": 21425
},
{
"epoch": 2.53,
"learning_rate": 5.311123708193433e-06,
"loss": 0.0008,
"step": 21450
},
{
"epoch": 2.53,
"learning_rate": 5.27841974447303e-06,
"loss": 0.0001,
"step": 21475
},
{
"epoch": 2.53,
"learning_rate": 5.245715780752627e-06,
"loss": 0.0017,
"step": 21500
},
{
"epoch": 2.53,
"learning_rate": 5.213011817032224e-06,
"loss": 0.0001,
"step": 21525
},
{
"epoch": 2.54,
"learning_rate": 5.180307853311821e-06,
"loss": 0.0,
"step": 21550
},
{
"epoch": 2.54,
"learning_rate": 5.147603889591418e-06,
"loss": 0.0001,
"step": 21575
},
{
"epoch": 2.54,
"learning_rate": 5.117516242968648e-06,
"loss": 0.0013,
"step": 21600
},
{
"epoch": 2.55,
"learning_rate": 5.084812279248245e-06,
"loss": 0.0001,
"step": 21625
},
{
"epoch": 2.55,
"learning_rate": 5.052108315527842e-06,
"loss": 0.0001,
"step": 21650
},
{
"epoch": 2.55,
"learning_rate": 5.019404351807439e-06,
"loss": 0.0,
"step": 21675
},
{
"epoch": 2.55,
"learning_rate": 4.986700388087036e-06,
"loss": 0.0,
"step": 21700
},
{
"epoch": 2.56,
"learning_rate": 4.953996424366633e-06,
"loss": 0.0,
"step": 21725
},
{
"epoch": 2.56,
"learning_rate": 4.92129246064623e-06,
"loss": 0.0,
"step": 21750
},
{
"epoch": 2.56,
"learning_rate": 4.888588496925828e-06,
"loss": 0.0,
"step": 21775
},
{
"epoch": 2.57,
"learning_rate": 4.855884533205425e-06,
"loss": 0.0,
"step": 21800
},
{
"epoch": 2.57,
"learning_rate": 4.823180569485022e-06,
"loss": 0.0,
"step": 21825
},
{
"epoch": 2.57,
"learning_rate": 4.790476605764619e-06,
"loss": 0.0,
"step": 21850
},
{
"epoch": 2.58,
"learning_rate": 4.757772642044216e-06,
"loss": 0.0001,
"step": 21875
},
{
"epoch": 2.58,
"learning_rate": 4.725068678323813e-06,
"loss": 0.0059,
"step": 21900
},
{
"epoch": 2.58,
"learning_rate": 4.69236471460341e-06,
"loss": 0.0,
"step": 21925
},
{
"epoch": 2.58,
"learning_rate": 4.659660750883007e-06,
"loss": 0.0001,
"step": 21950
},
{
"epoch": 2.59,
"learning_rate": 4.626956787162604e-06,
"loss": 0.0001,
"step": 21975
},
{
"epoch": 2.59,
"learning_rate": 4.594252823442201e-06,
"loss": 0.0001,
"step": 22000
},
{
"epoch": 2.59,
"learning_rate": 4.561548859721798e-06,
"loss": 0.0142,
"step": 22025
},
{
"epoch": 2.6,
"learning_rate": 4.528844896001395e-06,
"loss": 0.0073,
"step": 22050
},
{
"epoch": 2.6,
"learning_rate": 4.496140932280992e-06,
"loss": 0.0001,
"step": 22075
},
{
"epoch": 2.6,
"learning_rate": 4.463436968560589e-06,
"loss": 0.0,
"step": 22100
},
{
"epoch": 2.6,
"learning_rate": 4.430733004840187e-06,
"loss": 0.003,
"step": 22125
},
{
"epoch": 2.61,
"learning_rate": 4.398029041119784e-06,
"loss": 0.0007,
"step": 22150
},
{
"epoch": 2.61,
"learning_rate": 4.365325077399381e-06,
"loss": 0.0001,
"step": 22175
},
{
"epoch": 2.61,
"learning_rate": 4.332621113678979e-06,
"loss": 0.0001,
"step": 22200
},
{
"epoch": 2.62,
"learning_rate": 4.299917149958576e-06,
"loss": 0.0003,
"step": 22225
},
{
"epoch": 2.62,
"learning_rate": 4.267213186238173e-06,
"loss": 0.0,
"step": 22250
},
{
"epoch": 2.62,
"learning_rate": 4.23450922251777e-06,
"loss": 0.0,
"step": 22275
},
{
"epoch": 2.63,
"learning_rate": 4.201805258797367e-06,
"loss": 0.0001,
"step": 22300
},
{
"epoch": 2.63,
"learning_rate": 4.169101295076964e-06,
"loss": 0.0056,
"step": 22325
},
{
"epoch": 2.63,
"learning_rate": 4.136397331356561e-06,
"loss": 0.0,
"step": 22350
},
{
"epoch": 2.63,
"learning_rate": 4.103693367636158e-06,
"loss": 0.0,
"step": 22375
},
{
"epoch": 2.64,
"learning_rate": 4.070989403915755e-06,
"loss": 0.0001,
"step": 22400
},
{
"epoch": 2.64,
"learning_rate": 4.038285440195352e-06,
"loss": 0.015,
"step": 22425
},
{
"epoch": 2.64,
"learning_rate": 4.005581476474949e-06,
"loss": 0.0006,
"step": 22450
},
{
"epoch": 2.65,
"learning_rate": 3.972877512754546e-06,
"loss": 0.0041,
"step": 22475
},
{
"epoch": 2.65,
"learning_rate": 3.940173549034143e-06,
"loss": 0.0004,
"step": 22500
},
{
"epoch": 2.65,
"learning_rate": 3.90746958531374e-06,
"loss": 0.0001,
"step": 22525
},
{
"epoch": 2.65,
"learning_rate": 3.874765621593337e-06,
"loss": 0.0004,
"step": 22550
},
{
"epoch": 2.66,
"learning_rate": 3.842061657872934e-06,
"loss": 0.0001,
"step": 22575
},
{
"epoch": 2.66,
"learning_rate": 3.8119740112501635e-06,
"loss": 0.0,
"step": 22600
},
{
"epoch": 2.66,
"learning_rate": 3.7792700475297605e-06,
"loss": 0.0001,
"step": 22625
},
{
"epoch": 2.67,
"learning_rate": 3.7465660838093575e-06,
"loss": 0.0003,
"step": 22650
},
{
"epoch": 2.67,
"learning_rate": 3.7138621200889545e-06,
"loss": 0.0055,
"step": 22675
},
{
"epoch": 2.67,
"learning_rate": 3.6811581563685523e-06,
"loss": 0.0014,
"step": 22700
},
{
"epoch": 2.68,
"learning_rate": 3.6484541926481493e-06,
"loss": 0.0001,
"step": 22725
},
{
"epoch": 2.68,
"learning_rate": 3.6157502289277463e-06,
"loss": 0.0127,
"step": 22750
},
{
"epoch": 2.68,
"learning_rate": 3.5830462652073433e-06,
"loss": 0.0001,
"step": 22775
},
{
"epoch": 2.68,
"learning_rate": 3.5503423014869403e-06,
"loss": 0.0001,
"step": 22800
},
{
"epoch": 2.69,
"learning_rate": 3.5176383377665373e-06,
"loss": 0.0152,
"step": 22825
},
{
"epoch": 2.69,
"learning_rate": 3.4849343740461343e-06,
"loss": 0.0035,
"step": 22850
},
{
"epoch": 2.69,
"learning_rate": 3.4522304103257313e-06,
"loss": 0.0177,
"step": 22875
},
{
"epoch": 2.7,
"learning_rate": 3.4195264466053283e-06,
"loss": 0.0096,
"step": 22900
},
{
"epoch": 2.7,
"learning_rate": 3.3868224828849258e-06,
"loss": 0.0005,
"step": 22925
},
{
"epoch": 2.7,
"learning_rate": 3.3541185191645228e-06,
"loss": 0.0044,
"step": 22950
},
{
"epoch": 2.7,
"learning_rate": 3.32141455544412e-06,
"loss": 0.0066,
"step": 22975
},
{
"epoch": 2.71,
"learning_rate": 3.288710591723717e-06,
"loss": 0.0,
"step": 23000
},
{
"epoch": 2.71,
"learning_rate": 3.256006628003314e-06,
"loss": 0.0008,
"step": 23025
},
{
"epoch": 2.71,
"learning_rate": 3.223302664282911e-06,
"loss": 0.0,
"step": 23050
},
{
"epoch": 2.72,
"learning_rate": 3.190598700562508e-06,
"loss": 0.0002,
"step": 23075
},
{
"epoch": 2.72,
"learning_rate": 3.157894736842105e-06,
"loss": 0.0,
"step": 23100
},
{
"epoch": 2.72,
"learning_rate": 3.1251907731217026e-06,
"loss": 0.0001,
"step": 23125
},
{
"epoch": 2.73,
"learning_rate": 3.0924868094012996e-06,
"loss": 0.0162,
"step": 23150
},
{
"epoch": 2.73,
"learning_rate": 3.0597828456808966e-06,
"loss": 0.0133,
"step": 23175
},
{
"epoch": 2.73,
"learning_rate": 3.0270788819604936e-06,
"loss": 0.0098,
"step": 23200
},
{
"epoch": 2.73,
"learning_rate": 2.9943749182400906e-06,
"loss": 0.0002,
"step": 23225
},
{
"epoch": 2.74,
"learning_rate": 2.961670954519688e-06,
"loss": 0.0002,
"step": 23250
},
{
"epoch": 2.74,
"learning_rate": 2.928966990799285e-06,
"loss": 0.0004,
"step": 23275
},
{
"epoch": 2.74,
"learning_rate": 2.896263027078882e-06,
"loss": 0.0083,
"step": 23300
},
{
"epoch": 2.75,
"learning_rate": 2.863559063358479e-06,
"loss": 0.0035,
"step": 23325
},
{
"epoch": 2.75,
"learning_rate": 2.8308550996380764e-06,
"loss": 0.0137,
"step": 23350
},
{
"epoch": 2.75,
"learning_rate": 2.7981511359176734e-06,
"loss": 0.0084,
"step": 23375
},
{
"epoch": 2.75,
"learning_rate": 2.7654471721972704e-06,
"loss": 0.0,
"step": 23400
},
{
"epoch": 2.76,
"learning_rate": 2.7327432084768674e-06,
"loss": 0.0038,
"step": 23425
},
{
"epoch": 2.76,
"learning_rate": 2.7000392447564644e-06,
"loss": 0.0001,
"step": 23450
},
{
"epoch": 2.76,
"learning_rate": 2.6673352810360614e-06,
"loss": 0.0,
"step": 23475
},
{
"epoch": 2.77,
"learning_rate": 2.6346313173156584e-06,
"loss": 0.0004,
"step": 23500
},
{
"epoch": 2.77,
"learning_rate": 2.601927353595256e-06,
"loss": 0.0001,
"step": 23525
},
{
"epoch": 2.77,
"learning_rate": 2.5692233898748533e-06,
"loss": 0.0037,
"step": 23550
},
{
"epoch": 2.78,
"learning_rate": 2.5365194261544503e-06,
"loss": 0.0001,
"step": 23575
},
{
"epoch": 2.78,
"learning_rate": 2.5064317795316792e-06,
"loss": 0.0089,
"step": 23600
},
{
"epoch": 2.78,
"learning_rate": 2.4737278158112762e-06,
"loss": 0.0014,
"step": 23625
},
{
"epoch": 2.78,
"learning_rate": 2.4410238520908732e-06,
"loss": 0.0001,
"step": 23650
},
{
"epoch": 2.79,
"learning_rate": 2.4096280469192866e-06,
"loss": 0.0085,
"step": 23675
},
{
"epoch": 2.79,
"learning_rate": 2.3769240831988836e-06,
"loss": 0.0047,
"step": 23700
},
{
"epoch": 2.79,
"learning_rate": 2.344220119478481e-06,
"loss": 0.0001,
"step": 23725
},
{
"epoch": 2.8,
"learning_rate": 2.311516155758078e-06,
"loss": 0.0,
"step": 23750
},
{
"epoch": 2.8,
"learning_rate": 2.278812192037675e-06,
"loss": 0.0001,
"step": 23775
},
{
"epoch": 2.8,
"learning_rate": 2.246108228317272e-06,
"loss": 0.0001,
"step": 23800
},
{
"epoch": 2.8,
"learning_rate": 2.2134042645968695e-06,
"loss": 0.0057,
"step": 23825
},
{
"epoch": 2.81,
"learning_rate": 2.1807003008764665e-06,
"loss": 0.0,
"step": 23850
},
{
"epoch": 2.81,
"learning_rate": 2.1479963371560635e-06,
"loss": 0.0,
"step": 23875
},
{
"epoch": 2.81,
"learning_rate": 2.1152923734356605e-06,
"loss": 0.0,
"step": 23900
},
{
"epoch": 2.82,
"learning_rate": 2.0825884097152575e-06,
"loss": 0.0,
"step": 23925
},
{
"epoch": 2.82,
"learning_rate": 2.0498844459948545e-06,
"loss": 0.0,
"step": 23950
},
{
"epoch": 2.82,
"learning_rate": 2.0171804822744514e-06,
"loss": 0.0001,
"step": 23975
},
{
"epoch": 2.83,
"learning_rate": 1.9844765185540484e-06,
"loss": 0.0,
"step": 24000
},
{
"epoch": 2.83,
"learning_rate": 1.951772554833646e-06,
"loss": 0.0001,
"step": 24025
},
{
"epoch": 2.83,
"learning_rate": 1.9190685911132433e-06,
"loss": 0.008,
"step": 24050
},
{
"epoch": 2.83,
"learning_rate": 1.8863646273928403e-06,
"loss": 0.0001,
"step": 24075
},
{
"epoch": 2.84,
"learning_rate": 1.8536606636724373e-06,
"loss": 0.0,
"step": 24100
},
{
"epoch": 2.84,
"learning_rate": 1.8209566999520343e-06,
"loss": 0.0003,
"step": 24125
},
{
"epoch": 2.84,
"learning_rate": 1.7882527362316313e-06,
"loss": 0.0011,
"step": 24150
},
{
"epoch": 2.85,
"learning_rate": 1.7555487725112283e-06,
"loss": 0.0003,
"step": 24175
},
{
"epoch": 2.85,
"learning_rate": 1.7228448087908255e-06,
"loss": 0.0069,
"step": 24200
},
{
"epoch": 2.85,
"learning_rate": 1.6901408450704227e-06,
"loss": 0.0012,
"step": 24225
},
{
"epoch": 2.85,
"learning_rate": 1.6574368813500197e-06,
"loss": 0.0001,
"step": 24250
},
{
"epoch": 2.86,
"learning_rate": 1.6247329176296167e-06,
"loss": 0.0031,
"step": 24275
},
{
"epoch": 2.86,
"learning_rate": 1.592028953909214e-06,
"loss": 0.0001,
"step": 24300
},
{
"epoch": 2.86,
"learning_rate": 1.559324990188811e-06,
"loss": 0.0037,
"step": 24325
},
{
"epoch": 2.87,
"learning_rate": 1.526621026468408e-06,
"loss": 0.0136,
"step": 24350
},
{
"epoch": 2.87,
"learning_rate": 1.4939170627480051e-06,
"loss": 0.002,
"step": 24375
},
{
"epoch": 2.87,
"learning_rate": 1.4612130990276021e-06,
"loss": 0.0,
"step": 24400
},
{
"epoch": 2.88,
"learning_rate": 1.4285091353071993e-06,
"loss": 0.0001,
"step": 24425
},
{
"epoch": 2.88,
"learning_rate": 1.3958051715867963e-06,
"loss": 0.0001,
"step": 24450
},
{
"epoch": 2.88,
"learning_rate": 1.3631012078663933e-06,
"loss": 0.003,
"step": 24475
},
{
"epoch": 2.88,
"learning_rate": 1.3303972441459905e-06,
"loss": 0.0001,
"step": 24500
},
{
"epoch": 2.89,
"learning_rate": 1.2976932804255877e-06,
"loss": 0.0037,
"step": 24525
},
{
"epoch": 2.89,
"learning_rate": 1.2649893167051847e-06,
"loss": 0.0,
"step": 24550
},
{
"epoch": 2.89,
"learning_rate": 1.2322853529847817e-06,
"loss": 0.0037,
"step": 24575
},
{
"epoch": 2.9,
"learning_rate": 1.1995813892643787e-06,
"loss": 0.0,
"step": 24600
},
{
"epoch": 2.9,
"learning_rate": 1.166877425543976e-06,
"loss": 0.0082,
"step": 24625
},
{
"epoch": 2.9,
"learning_rate": 1.1341734618235732e-06,
"loss": 0.0,
"step": 24650
},
{
"epoch": 2.9,
"learning_rate": 1.1014694981031702e-06,
"loss": 0.0001,
"step": 24675
},
{
"epoch": 2.91,
"learning_rate": 1.0687655343827672e-06,
"loss": 0.0001,
"step": 24700
},
{
"epoch": 2.91,
"learning_rate": 1.0360615706623644e-06,
"loss": 0.0001,
"step": 24725
},
{
"epoch": 2.91,
"learning_rate": 1.0033576069419614e-06,
"loss": 0.0001,
"step": 24750
},
{
"epoch": 2.92,
"learning_rate": 9.706536432215586e-07,
"loss": 0.0,
"step": 24775
},
{
"epoch": 2.92,
"learning_rate": 9.379496795011555e-07,
"loss": 0.0,
"step": 24800
},
{
"epoch": 2.92,
"learning_rate": 9.052457157807527e-07,
"loss": 0.0074,
"step": 24825
},
{
"epoch": 2.93,
"learning_rate": 8.725417520603497e-07,
"loss": 0.0001,
"step": 24850
},
{
"epoch": 2.93,
"learning_rate": 8.398377883399468e-07,
"loss": 0.0,
"step": 24875
},
{
"epoch": 2.93,
"learning_rate": 8.071338246195439e-07,
"loss": 0.0001,
"step": 24900
},
{
"epoch": 2.93,
"learning_rate": 7.74429860899141e-07,
"loss": 0.0001,
"step": 24925
},
{
"epoch": 2.94,
"learning_rate": 7.417258971787381e-07,
"loss": 0.0036,
"step": 24950
},
{
"epoch": 2.94,
"learning_rate": 7.090219334583352e-07,
"loss": 0.0072,
"step": 24975
},
{
"epoch": 2.94,
"learning_rate": 6.763179697379322e-07,
"loss": 0.0022,
"step": 25000
},
{
"epoch": 2.95,
"learning_rate": 6.436140060175294e-07,
"loss": 0.0021,
"step": 25025
},
{
"epoch": 2.95,
"learning_rate": 6.109100422971264e-07,
"loss": 0.0084,
"step": 25050
},
{
"epoch": 2.95,
"learning_rate": 5.782060785767235e-07,
"loss": 0.0001,
"step": 25075
},
{
"epoch": 2.96,
"learning_rate": 5.455021148563206e-07,
"loss": 0.0,
"step": 25100
},
{
"epoch": 2.96,
"learning_rate": 5.127981511359177e-07,
"loss": 0.0,
"step": 25125
},
{
"epoch": 2.96,
"learning_rate": 4.800941874155147e-07,
"loss": 0.0097,
"step": 25150
},
{
"epoch": 2.96,
"learning_rate": 4.4739022369511187e-07,
"loss": 0.0001,
"step": 25175
},
{
"epoch": 2.97,
"learning_rate": 4.146862599747089e-07,
"loss": 0.0005,
"step": 25200
},
{
"epoch": 2.97,
"learning_rate": 3.81982296254306e-07,
"loss": 0.0057,
"step": 25225
},
{
"epoch": 2.97,
"learning_rate": 3.4927833253390313e-07,
"loss": 0.0001,
"step": 25250
},
{
"epoch": 2.98,
"learning_rate": 3.165743688135002e-07,
"loss": 0.0001,
"step": 25275
},
{
"epoch": 2.98,
"learning_rate": 2.838704050930973e-07,
"loss": 0.0003,
"step": 25300
},
{
"epoch": 2.98,
"learning_rate": 2.511664413726944e-07,
"loss": 0.0,
"step": 25325
},
{
"epoch": 2.98,
"learning_rate": 2.1846247765229147e-07,
"loss": 0.0001,
"step": 25350
},
{
"epoch": 2.99,
"learning_rate": 1.8575851393188857e-07,
"loss": 0.0001,
"step": 25375
},
{
"epoch": 2.99,
"learning_rate": 1.5305455021148562e-07,
"loss": 0.0001,
"step": 25400
},
{
"epoch": 2.99,
"learning_rate": 1.203505864910827e-07,
"loss": 0.0027,
"step": 25425
},
{
"epoch": 3.0,
"learning_rate": 8.76466227706798e-08,
"loss": 0.005,
"step": 25450
},
{
"epoch": 3.0,
"learning_rate": 5.4942659050276896e-08,
"loss": 0.0,
"step": 25475
},
{
"epoch": 3.0,
"eval_accuracy": 0.8805294322535702,
"eval_loss": 0.84130859375,
"eval_runtime": 4378.6622,
"eval_samples_per_second": 7.868,
"eval_steps_per_second": 0.246,
"step": 25482
},
{
"epoch": 3.0,
"step": 25482,
"total_flos": 3.59258808571394e+20,
"train_loss": 0.06268550049036697,
"train_runtime": 389325.9759,
"train_samples_per_second": 2.094,
"train_steps_per_second": 0.065
}
],
"max_steps": 25482,
"num_train_epochs": 3,
"total_flos": 3.59258808571394e+20,
"trial_name": null,
"trial_params": null
}