Mizuiro-sakura's picture
Upload 12 files
91b112b
raw
history blame
30.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 4881,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 0.00029877074370006146,
"loss": 2.9685,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 0.00029754148740012294,
"loss": 2.2769,
"step": 40
},
{
"epoch": 0.04,
"learning_rate": 0.00029631223110018436,
"loss": 2.1295,
"step": 60
},
{
"epoch": 0.05,
"learning_rate": 0.00029508297480024584,
"loss": 2.0832,
"step": 80
},
{
"epoch": 0.06,
"learning_rate": 0.00029385371850030727,
"loss": 2.0765,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 0.00029262446220036875,
"loss": 2.0831,
"step": 120
},
{
"epoch": 0.09,
"learning_rate": 0.00029139520590043023,
"loss": 2.1462,
"step": 140
},
{
"epoch": 0.1,
"learning_rate": 0.00029016594960049166,
"loss": 2.1126,
"step": 160
},
{
"epoch": 0.11,
"learning_rate": 0.00028893669330055314,
"loss": 2.0809,
"step": 180
},
{
"epoch": 0.12,
"learning_rate": 0.0002877074370006146,
"loss": 2.1573,
"step": 200
},
{
"epoch": 0.14,
"learning_rate": 0.00028647818070067605,
"loss": 2.1391,
"step": 220
},
{
"epoch": 0.15,
"learning_rate": 0.00028524892440073753,
"loss": 2.1265,
"step": 240
},
{
"epoch": 0.16,
"learning_rate": 0.00028401966810079896,
"loss": 2.0266,
"step": 260
},
{
"epoch": 0.17,
"learning_rate": 0.00028279041180086044,
"loss": 2.0723,
"step": 280
},
{
"epoch": 0.18,
"learning_rate": 0.0002815611555009219,
"loss": 2.1082,
"step": 300
},
{
"epoch": 0.2,
"learning_rate": 0.0002803318992009834,
"loss": 2.061,
"step": 320
},
{
"epoch": 0.21,
"learning_rate": 0.0002791026429010449,
"loss": 2.0605,
"step": 340
},
{
"epoch": 0.22,
"learning_rate": 0.0002778733866011063,
"loss": 2.0401,
"step": 360
},
{
"epoch": 0.23,
"learning_rate": 0.00027664413030116774,
"loss": 2.0441,
"step": 380
},
{
"epoch": 0.25,
"learning_rate": 0.0002754148740012292,
"loss": 2.0578,
"step": 400
},
{
"epoch": 0.26,
"learning_rate": 0.0002741856177012907,
"loss": 2.0634,
"step": 420
},
{
"epoch": 0.27,
"learning_rate": 0.00027295636140135213,
"loss": 1.9869,
"step": 440
},
{
"epoch": 0.28,
"learning_rate": 0.0002717271051014136,
"loss": 2.0628,
"step": 460
},
{
"epoch": 0.3,
"learning_rate": 0.0002704978488014751,
"loss": 2.0732,
"step": 480
},
{
"epoch": 0.31,
"learning_rate": 0.0002692685925015366,
"loss": 2.038,
"step": 500
},
{
"epoch": 0.32,
"learning_rate": 0.000268039336201598,
"loss": 2.0333,
"step": 520
},
{
"epoch": 0.33,
"learning_rate": 0.0002668100799016595,
"loss": 1.97,
"step": 540
},
{
"epoch": 0.34,
"learning_rate": 0.0002655808236017209,
"loss": 2.0097,
"step": 560
},
{
"epoch": 0.36,
"learning_rate": 0.0002643515673017824,
"loss": 1.9377,
"step": 580
},
{
"epoch": 0.37,
"learning_rate": 0.00026312231100184387,
"loss": 2.059,
"step": 600
},
{
"epoch": 0.38,
"learning_rate": 0.00026189305470190535,
"loss": 1.9423,
"step": 620
},
{
"epoch": 0.39,
"learning_rate": 0.0002606637984019668,
"loss": 2.0869,
"step": 640
},
{
"epoch": 0.41,
"learning_rate": 0.00025943454210202826,
"loss": 1.9231,
"step": 660
},
{
"epoch": 0.42,
"learning_rate": 0.0002582052858020897,
"loss": 2.0086,
"step": 680
},
{
"epoch": 0.43,
"learning_rate": 0.00025697602950215117,
"loss": 1.9623,
"step": 700
},
{
"epoch": 0.44,
"learning_rate": 0.00025574677320221265,
"loss": 1.9348,
"step": 720
},
{
"epoch": 0.45,
"learning_rate": 0.0002545175169022741,
"loss": 2.0155,
"step": 740
},
{
"epoch": 0.47,
"learning_rate": 0.00025328826060233556,
"loss": 1.9043,
"step": 760
},
{
"epoch": 0.48,
"learning_rate": 0.00025205900430239704,
"loss": 2.0102,
"step": 780
},
{
"epoch": 0.49,
"learning_rate": 0.0002508297480024585,
"loss": 1.9552,
"step": 800
},
{
"epoch": 0.5,
"learning_rate": 0.00024960049170251995,
"loss": 1.9779,
"step": 820
},
{
"epoch": 0.52,
"learning_rate": 0.0002483712354025814,
"loss": 1.981,
"step": 840
},
{
"epoch": 0.53,
"learning_rate": 0.00024714197910264286,
"loss": 1.9206,
"step": 860
},
{
"epoch": 0.54,
"learning_rate": 0.00024591272280270434,
"loss": 1.9573,
"step": 880
},
{
"epoch": 0.55,
"learning_rate": 0.0002446834665027658,
"loss": 2.0444,
"step": 900
},
{
"epoch": 0.57,
"learning_rate": 0.00024345421020282728,
"loss": 1.8819,
"step": 920
},
{
"epoch": 0.58,
"learning_rate": 0.00024222495390288873,
"loss": 1.9977,
"step": 940
},
{
"epoch": 0.59,
"learning_rate": 0.00024099569760295018,
"loss": 1.946,
"step": 960
},
{
"epoch": 0.6,
"learning_rate": 0.00023976644130301167,
"loss": 1.953,
"step": 980
},
{
"epoch": 0.61,
"learning_rate": 0.00023853718500307312,
"loss": 2.0032,
"step": 1000
},
{
"epoch": 0.63,
"learning_rate": 0.0002373079287031346,
"loss": 1.9271,
"step": 1020
},
{
"epoch": 0.64,
"learning_rate": 0.00023607867240319603,
"loss": 2.0297,
"step": 1040
},
{
"epoch": 0.65,
"learning_rate": 0.0002348494161032575,
"loss": 2.049,
"step": 1060
},
{
"epoch": 0.66,
"learning_rate": 0.00023362015980331896,
"loss": 1.8794,
"step": 1080
},
{
"epoch": 0.68,
"learning_rate": 0.00023239090350338044,
"loss": 1.9826,
"step": 1100
},
{
"epoch": 0.69,
"learning_rate": 0.00023116164720344193,
"loss": 2.0297,
"step": 1120
},
{
"epoch": 0.7,
"learning_rate": 0.00022993239090350335,
"loss": 1.9343,
"step": 1140
},
{
"epoch": 0.71,
"learning_rate": 0.0002287031346035648,
"loss": 1.956,
"step": 1160
},
{
"epoch": 0.73,
"learning_rate": 0.0002274738783036263,
"loss": 1.9653,
"step": 1180
},
{
"epoch": 0.74,
"learning_rate": 0.00022624462200368777,
"loss": 1.9609,
"step": 1200
},
{
"epoch": 0.75,
"learning_rate": 0.00022501536570374922,
"loss": 2.0,
"step": 1220
},
{
"epoch": 0.76,
"learning_rate": 0.00022378610940381065,
"loss": 2.0268,
"step": 1240
},
{
"epoch": 0.77,
"learning_rate": 0.00022255685310387213,
"loss": 2.0342,
"step": 1260
},
{
"epoch": 0.79,
"learning_rate": 0.00022132759680393361,
"loss": 1.9875,
"step": 1280
},
{
"epoch": 0.8,
"learning_rate": 0.00022009834050399507,
"loss": 1.9526,
"step": 1300
},
{
"epoch": 0.81,
"learning_rate": 0.00021886908420405652,
"loss": 1.9678,
"step": 1320
},
{
"epoch": 0.82,
"learning_rate": 0.00021763982790411798,
"loss": 2.1014,
"step": 1340
},
{
"epoch": 0.84,
"learning_rate": 0.00021641057160417946,
"loss": 1.9,
"step": 1360
},
{
"epoch": 0.85,
"learning_rate": 0.0002151813153042409,
"loss": 1.9648,
"step": 1380
},
{
"epoch": 0.86,
"learning_rate": 0.0002139520590043024,
"loss": 2.0172,
"step": 1400
},
{
"epoch": 0.87,
"learning_rate": 0.00021272280270436382,
"loss": 1.9327,
"step": 1420
},
{
"epoch": 0.89,
"learning_rate": 0.0002114935464044253,
"loss": 1.9365,
"step": 1440
},
{
"epoch": 0.9,
"learning_rate": 0.00021026429010448676,
"loss": 1.9678,
"step": 1460
},
{
"epoch": 0.91,
"learning_rate": 0.00020903503380454824,
"loss": 1.9248,
"step": 1480
},
{
"epoch": 0.92,
"learning_rate": 0.0002078057775046097,
"loss": 1.9312,
"step": 1500
},
{
"epoch": 0.93,
"learning_rate": 0.00020657652120467115,
"loss": 1.9107,
"step": 1520
},
{
"epoch": 0.95,
"learning_rate": 0.0002053472649047326,
"loss": 1.9331,
"step": 1540
},
{
"epoch": 0.96,
"learning_rate": 0.00020411800860479408,
"loss": 1.9028,
"step": 1560
},
{
"epoch": 0.97,
"learning_rate": 0.00020288875230485556,
"loss": 1.9537,
"step": 1580
},
{
"epoch": 0.98,
"learning_rate": 0.00020165949600491702,
"loss": 1.9189,
"step": 1600
},
{
"epoch": 1.0,
"learning_rate": 0.00020043023970497845,
"loss": 1.9677,
"step": 1620
},
{
"epoch": 1.01,
"learning_rate": 0.00019920098340503993,
"loss": 1.8744,
"step": 1640
},
{
"epoch": 1.02,
"learning_rate": 0.0001979717271051014,
"loss": 1.9705,
"step": 1660
},
{
"epoch": 1.03,
"learning_rate": 0.00019674247080516286,
"loss": 1.9018,
"step": 1680
},
{
"epoch": 1.04,
"learning_rate": 0.00019551321450522434,
"loss": 1.9697,
"step": 1700
},
{
"epoch": 1.06,
"learning_rate": 0.00019428395820528577,
"loss": 1.9494,
"step": 1720
},
{
"epoch": 1.07,
"learning_rate": 0.00019305470190534725,
"loss": 1.9556,
"step": 1740
},
{
"epoch": 1.08,
"learning_rate": 0.0001918254456054087,
"loss": 2.0141,
"step": 1760
},
{
"epoch": 1.09,
"learning_rate": 0.0001905961893054702,
"loss": 1.9397,
"step": 1780
},
{
"epoch": 1.11,
"learning_rate": 0.00018936693300553164,
"loss": 1.9258,
"step": 1800
},
{
"epoch": 1.12,
"learning_rate": 0.0001881376767055931,
"loss": 1.8207,
"step": 1820
},
{
"epoch": 1.13,
"learning_rate": 0.00018690842040565455,
"loss": 1.9226,
"step": 1840
},
{
"epoch": 1.14,
"learning_rate": 0.00018567916410571603,
"loss": 1.9205,
"step": 1860
},
{
"epoch": 1.16,
"learning_rate": 0.00018444990780577749,
"loss": 1.9983,
"step": 1880
},
{
"epoch": 1.17,
"learning_rate": 0.00018322065150583897,
"loss": 1.9727,
"step": 1900
},
{
"epoch": 1.18,
"learning_rate": 0.0001819913952059004,
"loss": 1.9329,
"step": 1920
},
{
"epoch": 1.19,
"learning_rate": 0.00018076213890596188,
"loss": 2.0107,
"step": 1940
},
{
"epoch": 1.2,
"learning_rate": 0.00017953288260602333,
"loss": 1.9328,
"step": 1960
},
{
"epoch": 1.22,
"learning_rate": 0.0001783036263060848,
"loss": 1.8733,
"step": 1980
},
{
"epoch": 1.23,
"learning_rate": 0.0001770743700061463,
"loss": 1.9073,
"step": 2000
},
{
"epoch": 1.24,
"learning_rate": 0.00017584511370620772,
"loss": 1.9431,
"step": 2020
},
{
"epoch": 1.25,
"learning_rate": 0.0001746158574062692,
"loss": 1.8844,
"step": 2040
},
{
"epoch": 1.27,
"learning_rate": 0.00017338660110633066,
"loss": 1.8902,
"step": 2060
},
{
"epoch": 1.28,
"learning_rate": 0.00017215734480639214,
"loss": 1.8887,
"step": 2080
},
{
"epoch": 1.29,
"learning_rate": 0.00017092808850645356,
"loss": 1.8653,
"step": 2100
},
{
"epoch": 1.3,
"learning_rate": 0.00016969883220651505,
"loss": 1.9092,
"step": 2120
},
{
"epoch": 1.32,
"learning_rate": 0.0001684695759065765,
"loss": 1.9427,
"step": 2140
},
{
"epoch": 1.33,
"learning_rate": 0.00016724031960663798,
"loss": 2.0016,
"step": 2160
},
{
"epoch": 1.34,
"learning_rate": 0.00016601106330669943,
"loss": 1.9192,
"step": 2180
},
{
"epoch": 1.35,
"learning_rate": 0.0001647818070067609,
"loss": 1.8911,
"step": 2200
},
{
"epoch": 1.36,
"learning_rate": 0.00016355255070682234,
"loss": 1.9627,
"step": 2220
},
{
"epoch": 1.38,
"learning_rate": 0.00016232329440688382,
"loss": 2.022,
"step": 2240
},
{
"epoch": 1.39,
"learning_rate": 0.00016109403810694528,
"loss": 1.8829,
"step": 2260
},
{
"epoch": 1.4,
"learning_rate": 0.00015986478180700676,
"loss": 2.0244,
"step": 2280
},
{
"epoch": 1.41,
"learning_rate": 0.0001586355255070682,
"loss": 1.9201,
"step": 2300
},
{
"epoch": 1.43,
"learning_rate": 0.00015740626920712967,
"loss": 1.9803,
"step": 2320
},
{
"epoch": 1.44,
"learning_rate": 0.00015617701290719112,
"loss": 1.9369,
"step": 2340
},
{
"epoch": 1.45,
"learning_rate": 0.0001549477566072526,
"loss": 1.9151,
"step": 2360
},
{
"epoch": 1.46,
"learning_rate": 0.00015371850030731409,
"loss": 1.8436,
"step": 2380
},
{
"epoch": 1.48,
"learning_rate": 0.0001524892440073755,
"loss": 1.9259,
"step": 2400
},
{
"epoch": 1.49,
"learning_rate": 0.00015125998770743697,
"loss": 1.8486,
"step": 2420
},
{
"epoch": 1.5,
"learning_rate": 0.00015003073140749845,
"loss": 1.8839,
"step": 2440
},
{
"epoch": 1.51,
"learning_rate": 0.00014880147510755993,
"loss": 1.917,
"step": 2460
},
{
"epoch": 1.52,
"learning_rate": 0.00014757221880762138,
"loss": 1.9474,
"step": 2480
},
{
"epoch": 1.54,
"learning_rate": 0.00014634296250768284,
"loss": 1.9225,
"step": 2500
},
{
"epoch": 1.55,
"learning_rate": 0.0001451137062077443,
"loss": 1.9257,
"step": 2520
},
{
"epoch": 1.56,
"learning_rate": 0.00014388444990780577,
"loss": 1.9105,
"step": 2540
},
{
"epoch": 1.57,
"learning_rate": 0.00014265519360786723,
"loss": 1.9799,
"step": 2560
},
{
"epoch": 1.59,
"learning_rate": 0.00014142593730792868,
"loss": 1.9632,
"step": 2580
},
{
"epoch": 1.6,
"learning_rate": 0.00014019668100799016,
"loss": 1.888,
"step": 2600
},
{
"epoch": 1.61,
"learning_rate": 0.00013896742470805162,
"loss": 1.9576,
"step": 2620
},
{
"epoch": 1.62,
"learning_rate": 0.00013773816840811307,
"loss": 2.0318,
"step": 2640
},
{
"epoch": 1.63,
"learning_rate": 0.00013650891210817455,
"loss": 1.9347,
"step": 2660
},
{
"epoch": 1.65,
"learning_rate": 0.000135279655808236,
"loss": 1.8473,
"step": 2680
},
{
"epoch": 1.66,
"learning_rate": 0.00013405039950829746,
"loss": 1.9583,
"step": 2700
},
{
"epoch": 1.67,
"learning_rate": 0.00013282114320835892,
"loss": 1.8072,
"step": 2720
},
{
"epoch": 1.68,
"learning_rate": 0.0001315918869084204,
"loss": 1.8624,
"step": 2740
},
{
"epoch": 1.7,
"learning_rate": 0.00013036263060848185,
"loss": 1.9646,
"step": 2760
},
{
"epoch": 1.71,
"learning_rate": 0.0001291333743085433,
"loss": 1.9538,
"step": 2780
},
{
"epoch": 1.72,
"learning_rate": 0.0001279041180086048,
"loss": 1.8872,
"step": 2800
},
{
"epoch": 1.73,
"learning_rate": 0.00012667486170866624,
"loss": 1.9137,
"step": 2820
},
{
"epoch": 1.75,
"learning_rate": 0.00012544560540872772,
"loss": 1.9049,
"step": 2840
},
{
"epoch": 1.76,
"learning_rate": 0.00012421634910878918,
"loss": 1.9567,
"step": 2860
},
{
"epoch": 1.77,
"learning_rate": 0.00012298709280885063,
"loss": 1.9053,
"step": 2880
},
{
"epoch": 1.78,
"learning_rate": 0.0001217578365089121,
"loss": 1.8927,
"step": 2900
},
{
"epoch": 1.79,
"learning_rate": 0.00012052858020897355,
"loss": 1.9475,
"step": 2920
},
{
"epoch": 1.81,
"learning_rate": 0.00011929932390903502,
"loss": 2.0315,
"step": 2940
},
{
"epoch": 1.82,
"learning_rate": 0.00011807006760909648,
"loss": 1.8144,
"step": 2960
},
{
"epoch": 1.83,
"learning_rate": 0.00011684081130915794,
"loss": 1.8562,
"step": 2980
},
{
"epoch": 1.84,
"learning_rate": 0.00011561155500921942,
"loss": 1.925,
"step": 3000
},
{
"epoch": 1.86,
"learning_rate": 0.00011438229870928088,
"loss": 1.9984,
"step": 3020
},
{
"epoch": 1.87,
"learning_rate": 0.00011315304240934235,
"loss": 1.9431,
"step": 3040
},
{
"epoch": 1.88,
"learning_rate": 0.0001119237861094038,
"loss": 1.9025,
"step": 3060
},
{
"epoch": 1.89,
"learning_rate": 0.00011069452980946527,
"loss": 1.9075,
"step": 3080
},
{
"epoch": 1.91,
"learning_rate": 0.00010946527350952672,
"loss": 1.9387,
"step": 3100
},
{
"epoch": 1.92,
"learning_rate": 0.00010823601720958819,
"loss": 1.8919,
"step": 3120
},
{
"epoch": 1.93,
"learning_rate": 0.00010700676090964966,
"loss": 1.9138,
"step": 3140
},
{
"epoch": 1.94,
"learning_rate": 0.00010577750460971111,
"loss": 1.9591,
"step": 3160
},
{
"epoch": 1.95,
"learning_rate": 0.00010454824830977258,
"loss": 1.9794,
"step": 3180
},
{
"epoch": 1.97,
"learning_rate": 0.00010331899200983403,
"loss": 1.9314,
"step": 3200
},
{
"epoch": 1.98,
"learning_rate": 0.0001020897357098955,
"loss": 1.9484,
"step": 3220
},
{
"epoch": 1.99,
"learning_rate": 0.00010086047940995697,
"loss": 1.839,
"step": 3240
},
{
"epoch": 2.0,
"learning_rate": 9.963122311001842e-05,
"loss": 1.8986,
"step": 3260
},
{
"epoch": 2.02,
"learning_rate": 9.840196681007989e-05,
"loss": 1.9466,
"step": 3280
},
{
"epoch": 2.03,
"learning_rate": 9.717271051014135e-05,
"loss": 1.9425,
"step": 3300
},
{
"epoch": 2.04,
"learning_rate": 9.594345421020281e-05,
"loss": 1.8959,
"step": 3320
},
{
"epoch": 2.05,
"learning_rate": 9.471419791026428e-05,
"loss": 1.8946,
"step": 3340
},
{
"epoch": 2.07,
"learning_rate": 9.348494161032574e-05,
"loss": 1.974,
"step": 3360
},
{
"epoch": 2.08,
"learning_rate": 9.225568531038722e-05,
"loss": 1.8717,
"step": 3380
},
{
"epoch": 2.09,
"learning_rate": 9.102642901044866e-05,
"loss": 1.9481,
"step": 3400
},
{
"epoch": 2.1,
"learning_rate": 8.979717271051014e-05,
"loss": 1.931,
"step": 3420
},
{
"epoch": 2.11,
"learning_rate": 8.856791641057161e-05,
"loss": 1.9382,
"step": 3440
},
{
"epoch": 2.13,
"learning_rate": 8.733866011063306e-05,
"loss": 1.9577,
"step": 3460
},
{
"epoch": 2.14,
"learning_rate": 8.610940381069453e-05,
"loss": 1.8826,
"step": 3480
},
{
"epoch": 2.15,
"learning_rate": 8.488014751075598e-05,
"loss": 1.902,
"step": 3500
},
{
"epoch": 2.16,
"learning_rate": 8.365089121081745e-05,
"loss": 1.9147,
"step": 3520
},
{
"epoch": 2.18,
"learning_rate": 8.24216349108789e-05,
"loss": 1.8675,
"step": 3540
},
{
"epoch": 2.19,
"learning_rate": 8.119237861094037e-05,
"loss": 1.877,
"step": 3560
},
{
"epoch": 2.2,
"learning_rate": 7.996312231100184e-05,
"loss": 2.0258,
"step": 3580
},
{
"epoch": 2.21,
"learning_rate": 7.87338660110633e-05,
"loss": 1.9204,
"step": 3600
},
{
"epoch": 2.22,
"learning_rate": 7.750460971112476e-05,
"loss": 1.9073,
"step": 3620
},
{
"epoch": 2.24,
"learning_rate": 7.627535341118622e-05,
"loss": 1.8481,
"step": 3640
},
{
"epoch": 2.25,
"learning_rate": 7.504609711124769e-05,
"loss": 1.896,
"step": 3660
},
{
"epoch": 2.26,
"learning_rate": 7.381684081130915e-05,
"loss": 1.9285,
"step": 3680
},
{
"epoch": 2.27,
"learning_rate": 7.258758451137061e-05,
"loss": 1.8239,
"step": 3700
},
{
"epoch": 2.29,
"learning_rate": 7.135832821143208e-05,
"loss": 1.9289,
"step": 3720
},
{
"epoch": 2.3,
"learning_rate": 7.012907191149354e-05,
"loss": 1.8431,
"step": 3740
},
{
"epoch": 2.31,
"learning_rate": 6.8899815611555e-05,
"loss": 1.9239,
"step": 3760
},
{
"epoch": 2.32,
"learning_rate": 6.767055931161647e-05,
"loss": 1.9132,
"step": 3780
},
{
"epoch": 2.34,
"learning_rate": 6.644130301167793e-05,
"loss": 1.9019,
"step": 3800
},
{
"epoch": 2.35,
"learning_rate": 6.521204671173939e-05,
"loss": 1.9947,
"step": 3820
},
{
"epoch": 2.36,
"learning_rate": 6.398279041180086e-05,
"loss": 1.9146,
"step": 3840
},
{
"epoch": 2.37,
"learning_rate": 6.275353411186232e-05,
"loss": 1.8672,
"step": 3860
},
{
"epoch": 2.38,
"learning_rate": 6.152427781192378e-05,
"loss": 1.9189,
"step": 3880
},
{
"epoch": 2.4,
"learning_rate": 6.0295021511985245e-05,
"loss": 1.8431,
"step": 3900
},
{
"epoch": 2.41,
"learning_rate": 5.9065765212046706e-05,
"loss": 1.9629,
"step": 3920
},
{
"epoch": 2.42,
"learning_rate": 5.7836508912108174e-05,
"loss": 1.8209,
"step": 3940
},
{
"epoch": 2.43,
"learning_rate": 5.6607252612169635e-05,
"loss": 1.9104,
"step": 3960
},
{
"epoch": 2.45,
"learning_rate": 5.5377996312231096e-05,
"loss": 1.8563,
"step": 3980
},
{
"epoch": 2.46,
"learning_rate": 5.414874001229256e-05,
"loss": 1.9091,
"step": 4000
},
{
"epoch": 2.47,
"learning_rate": 5.291948371235402e-05,
"loss": 1.9038,
"step": 4020
},
{
"epoch": 2.48,
"learning_rate": 5.169022741241548e-05,
"loss": 1.9083,
"step": 4040
},
{
"epoch": 2.5,
"learning_rate": 5.046097111247695e-05,
"loss": 1.9037,
"step": 4060
},
{
"epoch": 2.51,
"learning_rate": 4.9231714812538415e-05,
"loss": 1.8808,
"step": 4080
},
{
"epoch": 2.52,
"learning_rate": 4.8002458512599876e-05,
"loss": 1.9638,
"step": 4100
},
{
"epoch": 2.53,
"learning_rate": 4.677320221266134e-05,
"loss": 1.9124,
"step": 4120
},
{
"epoch": 2.54,
"learning_rate": 4.55439459127228e-05,
"loss": 1.9119,
"step": 4140
},
{
"epoch": 2.56,
"learning_rate": 4.4314689612784266e-05,
"loss": 1.8596,
"step": 4160
},
{
"epoch": 2.57,
"learning_rate": 4.3085433312845727e-05,
"loss": 1.8779,
"step": 4180
},
{
"epoch": 2.58,
"learning_rate": 4.185617701290719e-05,
"loss": 1.9442,
"step": 4200
},
{
"epoch": 2.59,
"learning_rate": 4.062692071296865e-05,
"loss": 1.8899,
"step": 4220
},
{
"epoch": 2.61,
"learning_rate": 3.939766441303011e-05,
"loss": 1.8925,
"step": 4240
},
{
"epoch": 2.62,
"learning_rate": 3.816840811309157e-05,
"loss": 1.9306,
"step": 4260
},
{
"epoch": 2.63,
"learning_rate": 3.693915181315304e-05,
"loss": 1.988,
"step": 4280
},
{
"epoch": 2.64,
"learning_rate": 3.5709895513214506e-05,
"loss": 1.8516,
"step": 4300
},
{
"epoch": 2.66,
"learning_rate": 3.448063921327597e-05,
"loss": 1.9874,
"step": 4320
},
{
"epoch": 2.67,
"learning_rate": 3.325138291333743e-05,
"loss": 1.9059,
"step": 4340
},
{
"epoch": 2.68,
"learning_rate": 3.202212661339889e-05,
"loss": 1.9219,
"step": 4360
},
{
"epoch": 2.69,
"learning_rate": 3.079287031346035e-05,
"loss": 1.8807,
"step": 4380
},
{
"epoch": 2.7,
"learning_rate": 2.9563614013521815e-05,
"loss": 1.842,
"step": 4400
},
{
"epoch": 2.72,
"learning_rate": 2.833435771358328e-05,
"loss": 1.9281,
"step": 4420
},
{
"epoch": 2.73,
"learning_rate": 2.7105101413644744e-05,
"loss": 1.8918,
"step": 4440
},
{
"epoch": 2.74,
"learning_rate": 2.5875845113706208e-05,
"loss": 1.8154,
"step": 4460
},
{
"epoch": 2.75,
"learning_rate": 2.464658881376767e-05,
"loss": 1.8775,
"step": 4480
},
{
"epoch": 2.77,
"learning_rate": 2.341733251382913e-05,
"loss": 1.9259,
"step": 4500
},
{
"epoch": 2.78,
"learning_rate": 2.2188076213890594e-05,
"loss": 1.8731,
"step": 4520
},
{
"epoch": 2.79,
"learning_rate": 2.095881991395206e-05,
"loss": 1.916,
"step": 4540
},
{
"epoch": 2.8,
"learning_rate": 1.972956361401352e-05,
"loss": 1.8511,
"step": 4560
},
{
"epoch": 2.81,
"learning_rate": 1.850030731407498e-05,
"loss": 1.8983,
"step": 4580
},
{
"epoch": 2.83,
"learning_rate": 1.7271051014136445e-05,
"loss": 1.8668,
"step": 4600
},
{
"epoch": 2.84,
"learning_rate": 1.604179471419791e-05,
"loss": 1.8947,
"step": 4620
},
{
"epoch": 2.85,
"learning_rate": 1.4812538414259372e-05,
"loss": 1.9118,
"step": 4640
},
{
"epoch": 2.86,
"learning_rate": 1.3583282114320835e-05,
"loss": 1.9495,
"step": 4660
},
{
"epoch": 2.88,
"learning_rate": 1.2354025814382298e-05,
"loss": 1.9175,
"step": 4680
},
{
"epoch": 2.89,
"learning_rate": 1.112476951444376e-05,
"loss": 1.9496,
"step": 4700
},
{
"epoch": 2.9,
"learning_rate": 9.895513214505223e-06,
"loss": 1.9861,
"step": 4720
},
{
"epoch": 2.91,
"learning_rate": 8.666256914566686e-06,
"loss": 1.8718,
"step": 4740
},
{
"epoch": 2.93,
"learning_rate": 7.4370006146281495e-06,
"loss": 1.88,
"step": 4760
},
{
"epoch": 2.94,
"learning_rate": 6.207744314689612e-06,
"loss": 1.8669,
"step": 4780
},
{
"epoch": 2.95,
"learning_rate": 4.978488014751076e-06,
"loss": 1.8853,
"step": 4800
},
{
"epoch": 2.96,
"learning_rate": 3.749231714812538e-06,
"loss": 1.958,
"step": 4820
},
{
"epoch": 2.97,
"learning_rate": 2.519975414874001e-06,
"loss": 1.8813,
"step": 4840
},
{
"epoch": 2.99,
"learning_rate": 1.2907191149354641e-06,
"loss": 1.8783,
"step": 4860
},
{
"epoch": 3.0,
"learning_rate": 6.146281499692685e-08,
"loss": 1.9501,
"step": 4880
},
{
"epoch": 3.0,
"eval_loss": 1.9423085451126099,
"eval_runtime": 128.9524,
"eval_samples_per_second": 15.51,
"eval_steps_per_second": 1.939,
"step": 4881
}
],
"max_steps": 4881,
"num_train_epochs": 3,
"total_flos": 4.564905941532672e+16,
"trial_name": null,
"trial_params": null
}