QingyiSi's picture
Upload 1268 files
4697198
raw
history blame
95.3 kB
{
"best_metric": 0.7123447060585022,
"best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved-alpaca-belle13b/checkpoint-13400",
"epoch": 2.8910463861920173,
"global_step": 13400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.9999999999999995e-05,
"loss": 1.6589,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.00011999999999999999,
"loss": 1.4071,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 0.00017999999999999998,
"loss": 1.044,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 0.00023999999999999998,
"loss": 0.9883,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 0.0003,
"loss": 0.9659,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 0.00029956537486417964,
"loss": 0.9505,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 0.00029913074972835925,
"loss": 0.9205,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 0.0002986961245925389,
"loss": 0.9168,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 0.0002982614994567186,
"loss": 0.9117,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 0.0002978268743208982,
"loss": 0.9064,
"step": 200
},
{
"epoch": 0.04,
"eval_loss": 0.9033477306365967,
"eval_runtime": 25.3136,
"eval_samples_per_second": 79.009,
"eval_steps_per_second": 1.264,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 0.00029739224918507785,
"loss": 0.8981,
"step": 220
},
{
"epoch": 0.05,
"learning_rate": 0.0002969576240492575,
"loss": 0.8912,
"step": 240
},
{
"epoch": 0.06,
"learning_rate": 0.0002965229989134371,
"loss": 0.8875,
"step": 260
},
{
"epoch": 0.06,
"learning_rate": 0.0002960883737776168,
"loss": 0.8907,
"step": 280
},
{
"epoch": 0.06,
"learning_rate": 0.00029565374864179645,
"loss": 0.8753,
"step": 300
},
{
"epoch": 0.07,
"learning_rate": 0.00029521912350597606,
"loss": 0.8782,
"step": 320
},
{
"epoch": 0.07,
"learning_rate": 0.0002947844983701557,
"loss": 0.8697,
"step": 340
},
{
"epoch": 0.08,
"learning_rate": 0.0002943498732343354,
"loss": 0.8745,
"step": 360
},
{
"epoch": 0.08,
"learning_rate": 0.000293915248098515,
"loss": 0.8725,
"step": 380
},
{
"epoch": 0.09,
"learning_rate": 0.00029348062296269466,
"loss": 0.8658,
"step": 400
},
{
"epoch": 0.09,
"eval_loss": 0.8655584454536438,
"eval_runtime": 25.3343,
"eval_samples_per_second": 78.944,
"eval_steps_per_second": 1.263,
"step": 400
},
{
"epoch": 0.09,
"learning_rate": 0.0002930459978268743,
"loss": 0.8641,
"step": 420
},
{
"epoch": 0.09,
"learning_rate": 0.00029261137269105393,
"loss": 0.8509,
"step": 440
},
{
"epoch": 0.1,
"learning_rate": 0.0002921767475552336,
"loss": 0.8541,
"step": 460
},
{
"epoch": 0.1,
"learning_rate": 0.00029174212241941326,
"loss": 0.8575,
"step": 480
},
{
"epoch": 0.11,
"learning_rate": 0.00029130749728359287,
"loss": 0.8482,
"step": 500
},
{
"epoch": 0.11,
"learning_rate": 0.00029087287214777253,
"loss": 0.8572,
"step": 520
},
{
"epoch": 0.12,
"learning_rate": 0.0002904382470119522,
"loss": 0.8489,
"step": 540
},
{
"epoch": 0.12,
"learning_rate": 0.0002900036218761318,
"loss": 0.8585,
"step": 560
},
{
"epoch": 0.13,
"learning_rate": 0.00028956899674031147,
"loss": 0.8387,
"step": 580
},
{
"epoch": 0.13,
"learning_rate": 0.00028913437160449113,
"loss": 0.8306,
"step": 600
},
{
"epoch": 0.13,
"eval_loss": 0.8434031009674072,
"eval_runtime": 25.3211,
"eval_samples_per_second": 78.986,
"eval_steps_per_second": 1.264,
"step": 600
},
{
"epoch": 0.13,
"learning_rate": 0.00028869974646867074,
"loss": 0.8331,
"step": 620
},
{
"epoch": 0.14,
"learning_rate": 0.0002882651213328504,
"loss": 0.8447,
"step": 640
},
{
"epoch": 0.14,
"learning_rate": 0.00028783049619703007,
"loss": 0.836,
"step": 660
},
{
"epoch": 0.15,
"learning_rate": 0.0002873958710612097,
"loss": 0.8436,
"step": 680
},
{
"epoch": 0.15,
"learning_rate": 0.00028696124592538934,
"loss": 0.8281,
"step": 700
},
{
"epoch": 0.16,
"learning_rate": 0.000286526620789569,
"loss": 0.8378,
"step": 720
},
{
"epoch": 0.16,
"learning_rate": 0.0002860919956537486,
"loss": 0.8338,
"step": 740
},
{
"epoch": 0.16,
"learning_rate": 0.0002856573705179283,
"loss": 0.8323,
"step": 760
},
{
"epoch": 0.17,
"learning_rate": 0.00028522274538210794,
"loss": 0.8153,
"step": 780
},
{
"epoch": 0.17,
"learning_rate": 0.00028478812024628755,
"loss": 0.8349,
"step": 800
},
{
"epoch": 0.17,
"eval_loss": 0.8282934427261353,
"eval_runtime": 25.4025,
"eval_samples_per_second": 78.733,
"eval_steps_per_second": 1.26,
"step": 800
},
{
"epoch": 0.18,
"learning_rate": 0.0002843534951104672,
"loss": 0.8198,
"step": 820
},
{
"epoch": 0.18,
"learning_rate": 0.0002839188699746469,
"loss": 0.8254,
"step": 840
},
{
"epoch": 0.19,
"learning_rate": 0.0002834842448388265,
"loss": 0.8165,
"step": 860
},
{
"epoch": 0.19,
"learning_rate": 0.00028304961970300615,
"loss": 0.8241,
"step": 880
},
{
"epoch": 0.19,
"learning_rate": 0.0002826149945671858,
"loss": 0.814,
"step": 900
},
{
"epoch": 0.2,
"learning_rate": 0.0002821803694313654,
"loss": 0.8222,
"step": 920
},
{
"epoch": 0.2,
"learning_rate": 0.0002817457442955451,
"loss": 0.825,
"step": 940
},
{
"epoch": 0.21,
"learning_rate": 0.00028131111915972475,
"loss": 0.8153,
"step": 960
},
{
"epoch": 0.21,
"learning_rate": 0.00028087649402390436,
"loss": 0.8229,
"step": 980
},
{
"epoch": 0.22,
"learning_rate": 0.00028044186888808397,
"loss": 0.8129,
"step": 1000
},
{
"epoch": 0.22,
"eval_loss": 0.816320538520813,
"eval_runtime": 25.4153,
"eval_samples_per_second": 78.693,
"eval_steps_per_second": 1.259,
"step": 1000
},
{
"epoch": 0.22,
"learning_rate": 0.00028000724375226363,
"loss": 0.8121,
"step": 1020
},
{
"epoch": 0.22,
"learning_rate": 0.0002795726186164433,
"loss": 0.8063,
"step": 1040
},
{
"epoch": 0.23,
"learning_rate": 0.0002791379934806229,
"loss": 0.8097,
"step": 1060
},
{
"epoch": 0.23,
"learning_rate": 0.00027870336834480257,
"loss": 0.8142,
"step": 1080
},
{
"epoch": 0.24,
"learning_rate": 0.00027826874320898223,
"loss": 0.8021,
"step": 1100
},
{
"epoch": 0.24,
"learning_rate": 0.00027783411807316184,
"loss": 0.8014,
"step": 1120
},
{
"epoch": 0.25,
"learning_rate": 0.0002773994929373415,
"loss": 0.8031,
"step": 1140
},
{
"epoch": 0.25,
"learning_rate": 0.00027696486780152117,
"loss": 0.8011,
"step": 1160
},
{
"epoch": 0.25,
"learning_rate": 0.0002765302426657008,
"loss": 0.7944,
"step": 1180
},
{
"epoch": 0.26,
"learning_rate": 0.00027609561752988044,
"loss": 0.8071,
"step": 1200
},
{
"epoch": 0.26,
"eval_loss": 0.8064733147621155,
"eval_runtime": 25.3901,
"eval_samples_per_second": 78.771,
"eval_steps_per_second": 1.26,
"step": 1200
},
{
"epoch": 0.26,
"learning_rate": 0.0002756609923940601,
"loss": 0.8025,
"step": 1220
},
{
"epoch": 0.27,
"learning_rate": 0.0002752263672582397,
"loss": 0.7954,
"step": 1240
},
{
"epoch": 0.27,
"learning_rate": 0.0002747917421224194,
"loss": 0.8013,
"step": 1260
},
{
"epoch": 0.28,
"learning_rate": 0.00027435711698659904,
"loss": 0.7967,
"step": 1280
},
{
"epoch": 0.28,
"learning_rate": 0.00027392249185077865,
"loss": 0.8132,
"step": 1300
},
{
"epoch": 0.28,
"learning_rate": 0.0002734878667149583,
"loss": 0.8017,
"step": 1320
},
{
"epoch": 0.29,
"learning_rate": 0.000273053241579138,
"loss": 0.7964,
"step": 1340
},
{
"epoch": 0.29,
"learning_rate": 0.0002726186164433176,
"loss": 0.8012,
"step": 1360
},
{
"epoch": 0.3,
"learning_rate": 0.00027218399130749725,
"loss": 0.7982,
"step": 1380
},
{
"epoch": 0.3,
"learning_rate": 0.0002717493661716769,
"loss": 0.8031,
"step": 1400
},
{
"epoch": 0.3,
"eval_loss": 0.798474133014679,
"eval_runtime": 25.432,
"eval_samples_per_second": 78.641,
"eval_steps_per_second": 1.258,
"step": 1400
},
{
"epoch": 0.31,
"learning_rate": 0.0002713147410358565,
"loss": 0.7925,
"step": 1420
},
{
"epoch": 0.31,
"learning_rate": 0.0002708801159000362,
"loss": 0.794,
"step": 1440
},
{
"epoch": 0.31,
"learning_rate": 0.00027044549076421585,
"loss": 0.804,
"step": 1460
},
{
"epoch": 0.32,
"learning_rate": 0.00027001086562839546,
"loss": 0.7942,
"step": 1480
},
{
"epoch": 0.32,
"learning_rate": 0.0002695762404925751,
"loss": 0.7872,
"step": 1500
},
{
"epoch": 0.33,
"learning_rate": 0.0002691416153567548,
"loss": 0.7962,
"step": 1520
},
{
"epoch": 0.33,
"learning_rate": 0.0002687069902209344,
"loss": 0.7898,
"step": 1540
},
{
"epoch": 0.34,
"learning_rate": 0.00026827236508511406,
"loss": 0.7886,
"step": 1560
},
{
"epoch": 0.34,
"learning_rate": 0.0002678377399492937,
"loss": 0.7904,
"step": 1580
},
{
"epoch": 0.35,
"learning_rate": 0.00026740311481347333,
"loss": 0.7892,
"step": 1600
},
{
"epoch": 0.35,
"eval_loss": 0.7912269234657288,
"eval_runtime": 25.444,
"eval_samples_per_second": 78.604,
"eval_steps_per_second": 1.258,
"step": 1600
},
{
"epoch": 0.35,
"learning_rate": 0.000266968489677653,
"loss": 0.7897,
"step": 1620
},
{
"epoch": 0.35,
"learning_rate": 0.00026653386454183266,
"loss": 0.7927,
"step": 1640
},
{
"epoch": 0.36,
"learning_rate": 0.00026609923940601227,
"loss": 0.7829,
"step": 1660
},
{
"epoch": 0.36,
"learning_rate": 0.00026566461427019193,
"loss": 0.7788,
"step": 1680
},
{
"epoch": 0.37,
"learning_rate": 0.0002652299891343716,
"loss": 0.786,
"step": 1700
},
{
"epoch": 0.37,
"learning_rate": 0.0002647953639985512,
"loss": 0.7828,
"step": 1720
},
{
"epoch": 0.38,
"learning_rate": 0.00026436073886273087,
"loss": 0.7788,
"step": 1740
},
{
"epoch": 0.38,
"learning_rate": 0.00026392611372691053,
"loss": 0.7851,
"step": 1760
},
{
"epoch": 0.38,
"learning_rate": 0.00026349148859109014,
"loss": 0.7936,
"step": 1780
},
{
"epoch": 0.39,
"learning_rate": 0.0002630568634552698,
"loss": 0.7758,
"step": 1800
},
{
"epoch": 0.39,
"eval_loss": 0.7854430675506592,
"eval_runtime": 25.4734,
"eval_samples_per_second": 78.513,
"eval_steps_per_second": 1.256,
"step": 1800
},
{
"epoch": 0.39,
"learning_rate": 0.00026262223831944947,
"loss": 0.787,
"step": 1820
},
{
"epoch": 0.4,
"learning_rate": 0.0002621876131836291,
"loss": 0.7779,
"step": 1840
},
{
"epoch": 0.4,
"learning_rate": 0.00026175298804780874,
"loss": 0.7792,
"step": 1860
},
{
"epoch": 0.41,
"learning_rate": 0.0002613183629119884,
"loss": 0.7728,
"step": 1880
},
{
"epoch": 0.41,
"learning_rate": 0.000260883737776168,
"loss": 0.7844,
"step": 1900
},
{
"epoch": 0.41,
"learning_rate": 0.0002604491126403477,
"loss": 0.7726,
"step": 1920
},
{
"epoch": 0.42,
"learning_rate": 0.00026001448750452734,
"loss": 0.7706,
"step": 1940
},
{
"epoch": 0.42,
"learning_rate": 0.00025957986236870695,
"loss": 0.7659,
"step": 1960
},
{
"epoch": 0.43,
"learning_rate": 0.0002591452372328866,
"loss": 0.7808,
"step": 1980
},
{
"epoch": 0.43,
"learning_rate": 0.0002587106120970663,
"loss": 0.7692,
"step": 2000
},
{
"epoch": 0.43,
"eval_loss": 0.7800412774085999,
"eval_runtime": 25.5146,
"eval_samples_per_second": 78.387,
"eval_steps_per_second": 1.254,
"step": 2000
},
{
"epoch": 0.44,
"learning_rate": 0.0002582759869612459,
"loss": 0.7665,
"step": 2020
},
{
"epoch": 0.44,
"learning_rate": 0.00025784136182542555,
"loss": 0.7795,
"step": 2040
},
{
"epoch": 0.44,
"learning_rate": 0.0002574067366896052,
"loss": 0.7846,
"step": 2060
},
{
"epoch": 0.45,
"learning_rate": 0.0002569721115537848,
"loss": 0.7639,
"step": 2080
},
{
"epoch": 0.45,
"learning_rate": 0.0002565374864179645,
"loss": 0.7827,
"step": 2100
},
{
"epoch": 0.46,
"learning_rate": 0.00025610286128214415,
"loss": 0.7751,
"step": 2120
},
{
"epoch": 0.46,
"learning_rate": 0.00025566823614632376,
"loss": 0.776,
"step": 2140
},
{
"epoch": 0.47,
"learning_rate": 0.0002552336110105034,
"loss": 0.7773,
"step": 2160
},
{
"epoch": 0.47,
"learning_rate": 0.0002547989858746831,
"loss": 0.7757,
"step": 2180
},
{
"epoch": 0.47,
"learning_rate": 0.0002543643607388627,
"loss": 0.7769,
"step": 2200
},
{
"epoch": 0.47,
"eval_loss": 0.7759379744529724,
"eval_runtime": 25.4789,
"eval_samples_per_second": 78.496,
"eval_steps_per_second": 1.256,
"step": 2200
},
{
"epoch": 0.48,
"learning_rate": 0.00025392973560304236,
"loss": 0.7657,
"step": 2220
},
{
"epoch": 0.48,
"learning_rate": 0.000253495110467222,
"loss": 0.7664,
"step": 2240
},
{
"epoch": 0.49,
"learning_rate": 0.00025306048533140163,
"loss": 0.7774,
"step": 2260
},
{
"epoch": 0.49,
"learning_rate": 0.0002526258601955813,
"loss": 0.7591,
"step": 2280
},
{
"epoch": 0.5,
"learning_rate": 0.00025219123505976096,
"loss": 0.7605,
"step": 2300
},
{
"epoch": 0.5,
"learning_rate": 0.00025175660992394057,
"loss": 0.7693,
"step": 2320
},
{
"epoch": 0.5,
"learning_rate": 0.00025132198478812023,
"loss": 0.7702,
"step": 2340
},
{
"epoch": 0.51,
"learning_rate": 0.0002508873596522999,
"loss": 0.7706,
"step": 2360
},
{
"epoch": 0.51,
"learning_rate": 0.0002504527345164795,
"loss": 0.7664,
"step": 2380
},
{
"epoch": 0.52,
"learning_rate": 0.00025001810938065917,
"loss": 0.76,
"step": 2400
},
{
"epoch": 0.52,
"eval_loss": 0.7723669409751892,
"eval_runtime": 25.4827,
"eval_samples_per_second": 78.485,
"eval_steps_per_second": 1.256,
"step": 2400
},
{
"epoch": 0.52,
"learning_rate": 0.00024958348424483883,
"loss": 0.7702,
"step": 2420
},
{
"epoch": 0.53,
"learning_rate": 0.00024914885910901844,
"loss": 0.7686,
"step": 2440
},
{
"epoch": 0.53,
"learning_rate": 0.0002487142339731981,
"loss": 0.762,
"step": 2460
},
{
"epoch": 0.54,
"learning_rate": 0.00024827960883737777,
"loss": 0.7719,
"step": 2480
},
{
"epoch": 0.54,
"learning_rate": 0.0002478449837015574,
"loss": 0.7612,
"step": 2500
},
{
"epoch": 0.54,
"learning_rate": 0.00024741035856573704,
"loss": 0.7565,
"step": 2520
},
{
"epoch": 0.55,
"learning_rate": 0.0002469757334299167,
"loss": 0.7719,
"step": 2540
},
{
"epoch": 0.55,
"learning_rate": 0.0002465411082940963,
"loss": 0.7619,
"step": 2560
},
{
"epoch": 0.56,
"learning_rate": 0.000246106483158276,
"loss": 0.7607,
"step": 2580
},
{
"epoch": 0.56,
"learning_rate": 0.00024567185802245564,
"loss": 0.7564,
"step": 2600
},
{
"epoch": 0.56,
"eval_loss": 0.7678729295730591,
"eval_runtime": 25.4455,
"eval_samples_per_second": 78.599,
"eval_steps_per_second": 1.258,
"step": 2600
},
{
"epoch": 0.57,
"learning_rate": 0.00024523723288663525,
"loss": 0.7613,
"step": 2620
},
{
"epoch": 0.57,
"learning_rate": 0.0002448026077508149,
"loss": 0.7525,
"step": 2640
},
{
"epoch": 0.57,
"learning_rate": 0.0002443679826149946,
"loss": 0.7563,
"step": 2660
},
{
"epoch": 0.58,
"learning_rate": 0.00024393335747917422,
"loss": 0.7601,
"step": 2680
},
{
"epoch": 0.58,
"learning_rate": 0.00024349873234335383,
"loss": 0.7633,
"step": 2700
},
{
"epoch": 0.59,
"learning_rate": 0.00024306410720753346,
"loss": 0.75,
"step": 2720
},
{
"epoch": 0.59,
"learning_rate": 0.0002426294820717131,
"loss": 0.7602,
"step": 2740
},
{
"epoch": 0.6,
"learning_rate": 0.00024219485693589276,
"loss": 0.7546,
"step": 2760
},
{
"epoch": 0.6,
"learning_rate": 0.0002417602318000724,
"loss": 0.7532,
"step": 2780
},
{
"epoch": 0.6,
"learning_rate": 0.00024132560666425203,
"loss": 0.7661,
"step": 2800
},
{
"epoch": 0.6,
"eval_loss": 0.7649803757667542,
"eval_runtime": 25.4783,
"eval_samples_per_second": 78.498,
"eval_steps_per_second": 1.256,
"step": 2800
},
{
"epoch": 0.61,
"learning_rate": 0.0002408909815284317,
"loss": 0.7587,
"step": 2820
},
{
"epoch": 0.61,
"learning_rate": 0.00024045635639261133,
"loss": 0.7543,
"step": 2840
},
{
"epoch": 0.62,
"learning_rate": 0.00024002173125679097,
"loss": 0.7672,
"step": 2860
},
{
"epoch": 0.62,
"learning_rate": 0.00023958710612097063,
"loss": 0.7623,
"step": 2880
},
{
"epoch": 0.63,
"learning_rate": 0.00023915248098515027,
"loss": 0.7487,
"step": 2900
},
{
"epoch": 0.63,
"learning_rate": 0.0002387178558493299,
"loss": 0.75,
"step": 2920
},
{
"epoch": 0.63,
"learning_rate": 0.00023828323071350957,
"loss": 0.7567,
"step": 2940
},
{
"epoch": 0.64,
"learning_rate": 0.0002378486055776892,
"loss": 0.7592,
"step": 2960
},
{
"epoch": 0.64,
"learning_rate": 0.00023741398044186884,
"loss": 0.7569,
"step": 2980
},
{
"epoch": 0.65,
"learning_rate": 0.0002369793553060485,
"loss": 0.7524,
"step": 3000
},
{
"epoch": 0.65,
"eval_loss": 0.7613279819488525,
"eval_runtime": 25.4837,
"eval_samples_per_second": 78.482,
"eval_steps_per_second": 1.256,
"step": 3000
},
{
"epoch": 0.65,
"learning_rate": 0.00023654473017022814,
"loss": 0.7593,
"step": 3020
},
{
"epoch": 0.66,
"learning_rate": 0.00023611010503440778,
"loss": 0.7516,
"step": 3040
},
{
"epoch": 0.66,
"learning_rate": 0.00023567547989858744,
"loss": 0.7525,
"step": 3060
},
{
"epoch": 0.66,
"learning_rate": 0.00023524085476276708,
"loss": 0.7583,
"step": 3080
},
{
"epoch": 0.67,
"learning_rate": 0.00023480622962694672,
"loss": 0.7535,
"step": 3100
},
{
"epoch": 0.67,
"learning_rate": 0.00023437160449112638,
"loss": 0.7528,
"step": 3120
},
{
"epoch": 0.68,
"learning_rate": 0.00023393697935530602,
"loss": 0.7418,
"step": 3140
},
{
"epoch": 0.68,
"learning_rate": 0.00023350235421948565,
"loss": 0.7496,
"step": 3160
},
{
"epoch": 0.69,
"learning_rate": 0.00023306772908366532,
"loss": 0.7537,
"step": 3180
},
{
"epoch": 0.69,
"learning_rate": 0.00023263310394784495,
"loss": 0.7569,
"step": 3200
},
{
"epoch": 0.69,
"eval_loss": 0.7581906914710999,
"eval_runtime": 25.4588,
"eval_samples_per_second": 78.558,
"eval_steps_per_second": 1.257,
"step": 3200
},
{
"epoch": 0.69,
"learning_rate": 0.0002321984788120246,
"loss": 0.7465,
"step": 3220
},
{
"epoch": 0.7,
"learning_rate": 0.00023176385367620425,
"loss": 0.7367,
"step": 3240
},
{
"epoch": 0.7,
"learning_rate": 0.0002313292285403839,
"loss": 0.7425,
"step": 3260
},
{
"epoch": 0.71,
"learning_rate": 0.00023089460340456353,
"loss": 0.7637,
"step": 3280
},
{
"epoch": 0.71,
"learning_rate": 0.0002304599782687432,
"loss": 0.7574,
"step": 3300
},
{
"epoch": 0.72,
"learning_rate": 0.00023002535313292283,
"loss": 0.7448,
"step": 3320
},
{
"epoch": 0.72,
"learning_rate": 0.00022959072799710246,
"loss": 0.7595,
"step": 3340
},
{
"epoch": 0.72,
"learning_rate": 0.00022915610286128213,
"loss": 0.7465,
"step": 3360
},
{
"epoch": 0.73,
"learning_rate": 0.00022872147772546176,
"loss": 0.7532,
"step": 3380
},
{
"epoch": 0.73,
"learning_rate": 0.0002282868525896414,
"loss": 0.7466,
"step": 3400
},
{
"epoch": 0.73,
"eval_loss": 0.7559078931808472,
"eval_runtime": 25.464,
"eval_samples_per_second": 78.542,
"eval_steps_per_second": 1.257,
"step": 3400
},
{
"epoch": 0.74,
"learning_rate": 0.00022785222745382106,
"loss": 0.753,
"step": 3420
},
{
"epoch": 0.74,
"learning_rate": 0.0002274176023180007,
"loss": 0.7459,
"step": 3440
},
{
"epoch": 0.75,
"learning_rate": 0.00022698297718218034,
"loss": 0.7519,
"step": 3460
},
{
"epoch": 0.75,
"learning_rate": 0.00022654835204636,
"loss": 0.7451,
"step": 3480
},
{
"epoch": 0.76,
"learning_rate": 0.00022611372691053964,
"loss": 0.7468,
"step": 3500
},
{
"epoch": 0.76,
"learning_rate": 0.00022567910177471927,
"loss": 0.7491,
"step": 3520
},
{
"epoch": 0.76,
"learning_rate": 0.00022524447663889894,
"loss": 0.7524,
"step": 3540
},
{
"epoch": 0.77,
"learning_rate": 0.00022480985150307857,
"loss": 0.7484,
"step": 3560
},
{
"epoch": 0.77,
"learning_rate": 0.0002243752263672582,
"loss": 0.7484,
"step": 3580
},
{
"epoch": 0.78,
"learning_rate": 0.00022394060123143787,
"loss": 0.7529,
"step": 3600
},
{
"epoch": 0.78,
"eval_loss": 0.7531791925430298,
"eval_runtime": 25.4572,
"eval_samples_per_second": 78.563,
"eval_steps_per_second": 1.257,
"step": 3600
},
{
"epoch": 0.78,
"learning_rate": 0.0002235059760956175,
"loss": 0.7475,
"step": 3620
},
{
"epoch": 0.79,
"learning_rate": 0.00022307135095979715,
"loss": 0.7518,
"step": 3640
},
{
"epoch": 0.79,
"learning_rate": 0.0002226367258239768,
"loss": 0.751,
"step": 3660
},
{
"epoch": 0.79,
"learning_rate": 0.00022220210068815645,
"loss": 0.7402,
"step": 3680
},
{
"epoch": 0.8,
"learning_rate": 0.00022176747555233608,
"loss": 0.755,
"step": 3700
},
{
"epoch": 0.8,
"learning_rate": 0.00022133285041651575,
"loss": 0.7441,
"step": 3720
},
{
"epoch": 0.81,
"learning_rate": 0.00022089822528069538,
"loss": 0.746,
"step": 3740
},
{
"epoch": 0.81,
"learning_rate": 0.00022046360014487502,
"loss": 0.7441,
"step": 3760
},
{
"epoch": 0.82,
"learning_rate": 0.00022002897500905468,
"loss": 0.7475,
"step": 3780
},
{
"epoch": 0.82,
"learning_rate": 0.00021959434987323432,
"loss": 0.7458,
"step": 3800
},
{
"epoch": 0.82,
"eval_loss": 0.7513870596885681,
"eval_runtime": 25.4906,
"eval_samples_per_second": 78.46,
"eval_steps_per_second": 1.255,
"step": 3800
},
{
"epoch": 0.82,
"learning_rate": 0.00021915972473741396,
"loss": 0.7436,
"step": 3820
},
{
"epoch": 0.83,
"learning_rate": 0.00021872509960159362,
"loss": 0.7451,
"step": 3840
},
{
"epoch": 0.83,
"learning_rate": 0.00021829047446577326,
"loss": 0.7475,
"step": 3860
},
{
"epoch": 0.84,
"learning_rate": 0.0002178558493299529,
"loss": 0.7424,
"step": 3880
},
{
"epoch": 0.84,
"learning_rate": 0.00021742122419413256,
"loss": 0.7503,
"step": 3900
},
{
"epoch": 0.85,
"learning_rate": 0.0002169865990583122,
"loss": 0.7334,
"step": 3920
},
{
"epoch": 0.85,
"learning_rate": 0.00021655197392249183,
"loss": 0.7436,
"step": 3940
},
{
"epoch": 0.85,
"learning_rate": 0.0002161173487866715,
"loss": 0.7453,
"step": 3960
},
{
"epoch": 0.86,
"learning_rate": 0.00021568272365085113,
"loss": 0.7424,
"step": 3980
},
{
"epoch": 0.86,
"learning_rate": 0.00021524809851503076,
"loss": 0.7509,
"step": 4000
},
{
"epoch": 0.86,
"eval_loss": 0.7488968968391418,
"eval_runtime": 25.492,
"eval_samples_per_second": 78.456,
"eval_steps_per_second": 1.255,
"step": 4000
},
{
"epoch": 0.87,
"learning_rate": 0.00021481347337921043,
"loss": 0.7445,
"step": 4020
},
{
"epoch": 0.87,
"learning_rate": 0.00021437884824339006,
"loss": 0.74,
"step": 4040
},
{
"epoch": 0.88,
"learning_rate": 0.0002139442231075697,
"loss": 0.7362,
"step": 4060
},
{
"epoch": 0.88,
"learning_rate": 0.00021350959797174936,
"loss": 0.7409,
"step": 4080
},
{
"epoch": 0.88,
"learning_rate": 0.000213074972835929,
"loss": 0.7315,
"step": 4100
},
{
"epoch": 0.89,
"learning_rate": 0.00021264034770010864,
"loss": 0.7488,
"step": 4120
},
{
"epoch": 0.89,
"learning_rate": 0.0002122057225642883,
"loss": 0.7375,
"step": 4140
},
{
"epoch": 0.9,
"learning_rate": 0.00021177109742846794,
"loss": 0.7481,
"step": 4160
},
{
"epoch": 0.9,
"learning_rate": 0.00021133647229264757,
"loss": 0.7524,
"step": 4180
},
{
"epoch": 0.91,
"learning_rate": 0.00021092357841361823,
"loss": 0.7403,
"step": 4200
},
{
"epoch": 0.91,
"eval_loss": 0.7469983100891113,
"eval_runtime": 25.4847,
"eval_samples_per_second": 78.479,
"eval_steps_per_second": 1.256,
"step": 4200
},
{
"epoch": 0.91,
"learning_rate": 0.00021048895327779787,
"loss": 0.7394,
"step": 4220
},
{
"epoch": 0.91,
"learning_rate": 0.0002100543281419775,
"loss": 0.7405,
"step": 4240
},
{
"epoch": 0.92,
"learning_rate": 0.00020961970300615717,
"loss": 0.7534,
"step": 4260
},
{
"epoch": 0.92,
"learning_rate": 0.0002091850778703368,
"loss": 0.7412,
"step": 4280
},
{
"epoch": 0.93,
"learning_rate": 0.00020875045273451644,
"loss": 0.7393,
"step": 4300
},
{
"epoch": 0.93,
"learning_rate": 0.0002083158275986961,
"loss": 0.7289,
"step": 4320
},
{
"epoch": 0.94,
"learning_rate": 0.00020788120246287574,
"loss": 0.7342,
"step": 4340
},
{
"epoch": 0.94,
"learning_rate": 0.00020744657732705538,
"loss": 0.7427,
"step": 4360
},
{
"epoch": 0.94,
"learning_rate": 0.00020701195219123504,
"loss": 0.7386,
"step": 4380
},
{
"epoch": 0.95,
"learning_rate": 0.00020657732705541468,
"loss": 0.7374,
"step": 4400
},
{
"epoch": 0.95,
"eval_loss": 0.7451291680335999,
"eval_runtime": 25.461,
"eval_samples_per_second": 78.552,
"eval_steps_per_second": 1.257,
"step": 4400
},
{
"epoch": 0.95,
"learning_rate": 0.0002061427019195943,
"loss": 0.7364,
"step": 4420
},
{
"epoch": 0.96,
"learning_rate": 0.00020570807678377398,
"loss": 0.7377,
"step": 4440
},
{
"epoch": 0.96,
"learning_rate": 0.0002052734516479536,
"loss": 0.7391,
"step": 4460
},
{
"epoch": 0.97,
"learning_rate": 0.00020483882651213325,
"loss": 0.731,
"step": 4480
},
{
"epoch": 0.97,
"learning_rate": 0.0002044042013763129,
"loss": 0.735,
"step": 4500
},
{
"epoch": 0.98,
"learning_rate": 0.00020396957624049255,
"loss": 0.7344,
"step": 4520
},
{
"epoch": 0.98,
"learning_rate": 0.00020353495110467219,
"loss": 0.7355,
"step": 4540
},
{
"epoch": 0.98,
"learning_rate": 0.00020310032596885185,
"loss": 0.7357,
"step": 4560
},
{
"epoch": 0.99,
"learning_rate": 0.00020266570083303149,
"loss": 0.7377,
"step": 4580
},
{
"epoch": 0.99,
"learning_rate": 0.00020223107569721112,
"loss": 0.7438,
"step": 4600
},
{
"epoch": 0.99,
"eval_loss": 0.7437875270843506,
"eval_runtime": 25.5255,
"eval_samples_per_second": 78.353,
"eval_steps_per_second": 1.254,
"step": 4600
},
{
"epoch": 1.0,
"learning_rate": 0.00020179645056139079,
"loss": 0.7343,
"step": 4620
},
{
"epoch": 1.0,
"learning_rate": 0.00020136182542557042,
"loss": 0.7473,
"step": 4640
},
{
"epoch": 1.01,
"learning_rate": 0.00020092720028975006,
"loss": 0.7305,
"step": 4660
},
{
"epoch": 1.01,
"learning_rate": 0.00020049257515392972,
"loss": 0.7284,
"step": 4680
},
{
"epoch": 1.01,
"learning_rate": 0.00020005795001810936,
"loss": 0.7335,
"step": 4700
},
{
"epoch": 1.02,
"learning_rate": 0.000199623324882289,
"loss": 0.7282,
"step": 4720
},
{
"epoch": 1.02,
"learning_rate": 0.00019918869974646866,
"loss": 0.7337,
"step": 4740
},
{
"epoch": 1.03,
"learning_rate": 0.0001987540746106483,
"loss": 0.7195,
"step": 4760
},
{
"epoch": 1.03,
"learning_rate": 0.00019831944947482793,
"loss": 0.7327,
"step": 4780
},
{
"epoch": 1.04,
"learning_rate": 0.0001978848243390076,
"loss": 0.7259,
"step": 4800
},
{
"epoch": 1.04,
"eval_loss": 0.7413464188575745,
"eval_runtime": 25.4959,
"eval_samples_per_second": 78.444,
"eval_steps_per_second": 1.255,
"step": 4800
},
{
"epoch": 1.04,
"learning_rate": 0.00019745019920318723,
"loss": 0.7263,
"step": 4820
},
{
"epoch": 1.04,
"learning_rate": 0.00019701557406736687,
"loss": 0.7341,
"step": 4840
},
{
"epoch": 1.05,
"learning_rate": 0.00019658094893154653,
"loss": 0.7406,
"step": 4860
},
{
"epoch": 1.05,
"learning_rate": 0.00019614632379572617,
"loss": 0.7309,
"step": 4880
},
{
"epoch": 1.06,
"learning_rate": 0.0001957116986599058,
"loss": 0.7274,
"step": 4900
},
{
"epoch": 1.06,
"learning_rate": 0.00019527707352408547,
"loss": 0.7241,
"step": 4920
},
{
"epoch": 1.07,
"learning_rate": 0.0001948424483882651,
"loss": 0.7368,
"step": 4940
},
{
"epoch": 1.07,
"learning_rate": 0.00019440782325244474,
"loss": 0.7445,
"step": 4960
},
{
"epoch": 1.07,
"learning_rate": 0.0001939731981166244,
"loss": 0.7347,
"step": 4980
},
{
"epoch": 1.08,
"learning_rate": 0.00019353857298080404,
"loss": 0.7436,
"step": 5000
},
{
"epoch": 1.08,
"eval_loss": 0.7399871945381165,
"eval_runtime": 25.5032,
"eval_samples_per_second": 78.422,
"eval_steps_per_second": 1.255,
"step": 5000
},
{
"epoch": 1.08,
"learning_rate": 0.00019310394784498368,
"loss": 0.7248,
"step": 5020
},
{
"epoch": 1.09,
"learning_rate": 0.00019266932270916334,
"loss": 0.7374,
"step": 5040
},
{
"epoch": 1.09,
"learning_rate": 0.00019223469757334298,
"loss": 0.7187,
"step": 5060
},
{
"epoch": 1.1,
"learning_rate": 0.00019180007243752261,
"loss": 0.7381,
"step": 5080
},
{
"epoch": 1.1,
"learning_rate": 0.00019136544730170228,
"loss": 0.7389,
"step": 5100
},
{
"epoch": 1.1,
"learning_rate": 0.00019093082216588191,
"loss": 0.7343,
"step": 5120
},
{
"epoch": 1.11,
"learning_rate": 0.00019049619703006155,
"loss": 0.7323,
"step": 5140
},
{
"epoch": 1.11,
"learning_rate": 0.00019006157189424121,
"loss": 0.723,
"step": 5160
},
{
"epoch": 1.12,
"learning_rate": 0.00018962694675842085,
"loss": 0.7236,
"step": 5180
},
{
"epoch": 1.12,
"learning_rate": 0.0001891923216226005,
"loss": 0.7399,
"step": 5200
},
{
"epoch": 1.12,
"eval_loss": 0.7393975257873535,
"eval_runtime": 25.6137,
"eval_samples_per_second": 78.083,
"eval_steps_per_second": 1.249,
"step": 5200
},
{
"epoch": 1.13,
"learning_rate": 0.00018875769648678015,
"loss": 0.7373,
"step": 5220
},
{
"epoch": 1.13,
"learning_rate": 0.0001883230713509598,
"loss": 0.7257,
"step": 5240
},
{
"epoch": 1.13,
"learning_rate": 0.00018788844621513942,
"loss": 0.7261,
"step": 5260
},
{
"epoch": 1.14,
"learning_rate": 0.0001874538210793191,
"loss": 0.7302,
"step": 5280
},
{
"epoch": 1.14,
"learning_rate": 0.00018701919594349872,
"loss": 0.7337,
"step": 5300
},
{
"epoch": 1.15,
"learning_rate": 0.00018658457080767836,
"loss": 0.7237,
"step": 5320
},
{
"epoch": 1.15,
"learning_rate": 0.00018614994567185802,
"loss": 0.7238,
"step": 5340
},
{
"epoch": 1.16,
"learning_rate": 0.00018571532053603766,
"loss": 0.7287,
"step": 5360
},
{
"epoch": 1.16,
"learning_rate": 0.0001852806954002173,
"loss": 0.7237,
"step": 5380
},
{
"epoch": 1.17,
"learning_rate": 0.00018484607026439696,
"loss": 0.7256,
"step": 5400
},
{
"epoch": 1.17,
"eval_loss": 0.7377527952194214,
"eval_runtime": 25.4964,
"eval_samples_per_second": 78.442,
"eval_steps_per_second": 1.255,
"step": 5400
},
{
"epoch": 1.17,
"learning_rate": 0.0001844114451285766,
"loss": 0.7279,
"step": 5420
},
{
"epoch": 1.17,
"learning_rate": 0.00018397681999275623,
"loss": 0.7226,
"step": 5440
},
{
"epoch": 1.18,
"learning_rate": 0.0001835421948569359,
"loss": 0.7167,
"step": 5460
},
{
"epoch": 1.18,
"learning_rate": 0.00018310756972111553,
"loss": 0.7268,
"step": 5480
},
{
"epoch": 1.19,
"learning_rate": 0.00018267294458529517,
"loss": 0.7398,
"step": 5500
},
{
"epoch": 1.19,
"learning_rate": 0.00018223831944947483,
"loss": 0.7331,
"step": 5520
},
{
"epoch": 1.2,
"learning_rate": 0.00018180369431365447,
"loss": 0.7372,
"step": 5540
},
{
"epoch": 1.2,
"learning_rate": 0.0001813690691778341,
"loss": 0.7321,
"step": 5560
},
{
"epoch": 1.2,
"learning_rate": 0.00018093444404201377,
"loss": 0.7346,
"step": 5580
},
{
"epoch": 1.21,
"learning_rate": 0.0001804998189061934,
"loss": 0.722,
"step": 5600
},
{
"epoch": 1.21,
"eval_loss": 0.7368175983428955,
"eval_runtime": 25.5045,
"eval_samples_per_second": 78.417,
"eval_steps_per_second": 1.255,
"step": 5600
},
{
"epoch": 1.21,
"learning_rate": 0.00018006519377037304,
"loss": 0.7279,
"step": 5620
},
{
"epoch": 1.22,
"learning_rate": 0.0001796305686345527,
"loss": 0.72,
"step": 5640
},
{
"epoch": 1.22,
"learning_rate": 0.00017919594349873234,
"loss": 0.7295,
"step": 5660
},
{
"epoch": 1.23,
"learning_rate": 0.00017876131836291198,
"loss": 0.7245,
"step": 5680
},
{
"epoch": 1.23,
"learning_rate": 0.00017832669322709164,
"loss": 0.7418,
"step": 5700
},
{
"epoch": 1.23,
"learning_rate": 0.00017789206809127128,
"loss": 0.7317,
"step": 5720
},
{
"epoch": 1.24,
"learning_rate": 0.00017745744295545092,
"loss": 0.7303,
"step": 5740
},
{
"epoch": 1.24,
"learning_rate": 0.00017702281781963058,
"loss": 0.7332,
"step": 5760
},
{
"epoch": 1.25,
"learning_rate": 0.00017658819268381022,
"loss": 0.7202,
"step": 5780
},
{
"epoch": 1.25,
"learning_rate": 0.00017615356754798983,
"loss": 0.7238,
"step": 5800
},
{
"epoch": 1.25,
"eval_loss": 0.7348505854606628,
"eval_runtime": 25.509,
"eval_samples_per_second": 78.404,
"eval_steps_per_second": 1.254,
"step": 5800
},
{
"epoch": 1.26,
"learning_rate": 0.00017571894241216946,
"loss": 0.724,
"step": 5820
},
{
"epoch": 1.26,
"learning_rate": 0.00017528431727634913,
"loss": 0.7258,
"step": 5840
},
{
"epoch": 1.26,
"learning_rate": 0.00017484969214052876,
"loss": 0.7217,
"step": 5860
},
{
"epoch": 1.27,
"learning_rate": 0.0001744150670047084,
"loss": 0.7209,
"step": 5880
},
{
"epoch": 1.27,
"learning_rate": 0.00017398044186888806,
"loss": 0.7276,
"step": 5900
},
{
"epoch": 1.28,
"learning_rate": 0.0001735458167330677,
"loss": 0.7287,
"step": 5920
},
{
"epoch": 1.28,
"learning_rate": 0.00017311119159724733,
"loss": 0.7244,
"step": 5940
},
{
"epoch": 1.29,
"learning_rate": 0.000172676566461427,
"loss": 0.7247,
"step": 5960
},
{
"epoch": 1.29,
"learning_rate": 0.00017224194132560663,
"loss": 0.7191,
"step": 5980
},
{
"epoch": 1.29,
"learning_rate": 0.00017180731618978627,
"loss": 0.7208,
"step": 6000
},
{
"epoch": 1.29,
"eval_loss": 0.7340711951255798,
"eval_runtime": 25.4669,
"eval_samples_per_second": 78.533,
"eval_steps_per_second": 1.257,
"step": 6000
},
{
"epoch": 1.3,
"learning_rate": 0.00017137269105396593,
"loss": 0.7285,
"step": 6020
},
{
"epoch": 1.3,
"learning_rate": 0.00017093806591814557,
"loss": 0.7294,
"step": 6040
},
{
"epoch": 1.31,
"learning_rate": 0.0001705034407823252,
"loss": 0.7365,
"step": 6060
},
{
"epoch": 1.31,
"learning_rate": 0.00017006881564650487,
"loss": 0.7149,
"step": 6080
},
{
"epoch": 1.32,
"learning_rate": 0.0001696341905106845,
"loss": 0.7229,
"step": 6100
},
{
"epoch": 1.32,
"learning_rate": 0.00016919956537486414,
"loss": 0.7253,
"step": 6120
},
{
"epoch": 1.32,
"learning_rate": 0.0001687649402390438,
"loss": 0.7188,
"step": 6140
},
{
"epoch": 1.33,
"learning_rate": 0.00016833031510322344,
"loss": 0.7308,
"step": 6160
},
{
"epoch": 1.33,
"learning_rate": 0.00016789568996740308,
"loss": 0.7186,
"step": 6180
},
{
"epoch": 1.34,
"learning_rate": 0.00016746106483158274,
"loss": 0.7121,
"step": 6200
},
{
"epoch": 1.34,
"eval_loss": 0.7324739694595337,
"eval_runtime": 25.5,
"eval_samples_per_second": 78.431,
"eval_steps_per_second": 1.255,
"step": 6200
},
{
"epoch": 1.34,
"learning_rate": 0.00016702643969576238,
"loss": 0.7286,
"step": 6220
},
{
"epoch": 1.35,
"learning_rate": 0.00016659181455994202,
"loss": 0.7246,
"step": 6240
},
{
"epoch": 1.35,
"learning_rate": 0.00016615718942412168,
"loss": 0.7234,
"step": 6260
},
{
"epoch": 1.35,
"learning_rate": 0.00016572256428830132,
"loss": 0.7245,
"step": 6280
},
{
"epoch": 1.36,
"learning_rate": 0.00016528793915248095,
"loss": 0.7252,
"step": 6300
},
{
"epoch": 1.36,
"learning_rate": 0.00016485331401666062,
"loss": 0.7259,
"step": 6320
},
{
"epoch": 1.37,
"learning_rate": 0.00016441868888084025,
"loss": 0.7173,
"step": 6340
},
{
"epoch": 1.37,
"learning_rate": 0.0001639840637450199,
"loss": 0.7222,
"step": 6360
},
{
"epoch": 1.38,
"learning_rate": 0.00016354943860919955,
"loss": 0.7113,
"step": 6380
},
{
"epoch": 1.38,
"learning_rate": 0.0001631148134733792,
"loss": 0.72,
"step": 6400
},
{
"epoch": 1.38,
"eval_loss": 0.7319995164871216,
"eval_runtime": 25.5112,
"eval_samples_per_second": 78.397,
"eval_steps_per_second": 1.254,
"step": 6400
},
{
"epoch": 1.39,
"learning_rate": 0.00016268018833755883,
"loss": 0.7333,
"step": 6420
},
{
"epoch": 1.39,
"learning_rate": 0.0001622455632017385,
"loss": 0.7208,
"step": 6440
},
{
"epoch": 1.39,
"learning_rate": 0.00016181093806591813,
"loss": 0.7161,
"step": 6460
},
{
"epoch": 1.4,
"learning_rate": 0.00016137631293009776,
"loss": 0.7171,
"step": 6480
},
{
"epoch": 1.4,
"learning_rate": 0.00016094168779427743,
"loss": 0.7297,
"step": 6500
},
{
"epoch": 1.41,
"learning_rate": 0.00016050706265845706,
"loss": 0.7156,
"step": 6520
},
{
"epoch": 1.41,
"learning_rate": 0.0001600724375226367,
"loss": 0.7175,
"step": 6540
},
{
"epoch": 1.42,
"learning_rate": 0.00015963781238681636,
"loss": 0.7152,
"step": 6560
},
{
"epoch": 1.42,
"learning_rate": 0.000159203187250996,
"loss": 0.7282,
"step": 6580
},
{
"epoch": 1.42,
"learning_rate": 0.00015876856211517564,
"loss": 0.722,
"step": 6600
},
{
"epoch": 1.42,
"eval_loss": 0.7307416796684265,
"eval_runtime": 25.4967,
"eval_samples_per_second": 78.442,
"eval_steps_per_second": 1.255,
"step": 6600
},
{
"epoch": 1.43,
"learning_rate": 0.0001583339369793553,
"loss": 0.7274,
"step": 6620
},
{
"epoch": 1.43,
"learning_rate": 0.00015789931184353494,
"loss": 0.7313,
"step": 6640
},
{
"epoch": 1.44,
"learning_rate": 0.00015746468670771457,
"loss": 0.7209,
"step": 6660
},
{
"epoch": 1.44,
"learning_rate": 0.00015703006157189424,
"loss": 0.7202,
"step": 6680
},
{
"epoch": 1.45,
"learning_rate": 0.00015659543643607387,
"loss": 0.7264,
"step": 6700
},
{
"epoch": 1.45,
"learning_rate": 0.0001561608113002535,
"loss": 0.7226,
"step": 6720
},
{
"epoch": 1.45,
"learning_rate": 0.00015572618616443317,
"loss": 0.711,
"step": 6740
},
{
"epoch": 1.46,
"learning_rate": 0.0001552915610286128,
"loss": 0.7216,
"step": 6760
},
{
"epoch": 1.46,
"learning_rate": 0.00015485693589279245,
"loss": 0.7184,
"step": 6780
},
{
"epoch": 1.47,
"learning_rate": 0.0001544223107569721,
"loss": 0.7216,
"step": 6800
},
{
"epoch": 1.47,
"eval_loss": 0.7297094464302063,
"eval_runtime": 25.4826,
"eval_samples_per_second": 78.485,
"eval_steps_per_second": 1.256,
"step": 6800
},
{
"epoch": 1.47,
"learning_rate": 0.00015398768562115175,
"loss": 0.7203,
"step": 6820
},
{
"epoch": 1.48,
"learning_rate": 0.00015355306048533138,
"loss": 0.7184,
"step": 6840
},
{
"epoch": 1.48,
"learning_rate": 0.00015311843534951105,
"loss": 0.7183,
"step": 6860
},
{
"epoch": 1.48,
"learning_rate": 0.00015268381021369068,
"loss": 0.7267,
"step": 6880
},
{
"epoch": 1.49,
"learning_rate": 0.00015224918507787032,
"loss": 0.7299,
"step": 6900
},
{
"epoch": 1.49,
"learning_rate": 0.00015181455994204998,
"loss": 0.719,
"step": 6920
},
{
"epoch": 1.5,
"learning_rate": 0.00015137993480622962,
"loss": 0.7229,
"step": 6940
},
{
"epoch": 1.5,
"learning_rate": 0.00015094530967040926,
"loss": 0.7231,
"step": 6960
},
{
"epoch": 1.51,
"learning_rate": 0.00015051068453458892,
"loss": 0.7279,
"step": 6980
},
{
"epoch": 1.51,
"learning_rate": 0.00015007605939876856,
"loss": 0.7252,
"step": 7000
},
{
"epoch": 1.51,
"eval_loss": 0.7288112640380859,
"eval_runtime": 25.4887,
"eval_samples_per_second": 78.466,
"eval_steps_per_second": 1.255,
"step": 7000
},
{
"epoch": 1.51,
"learning_rate": 0.0001496414342629482,
"loss": 0.7148,
"step": 7020
},
{
"epoch": 1.52,
"learning_rate": 0.00014920680912712786,
"loss": 0.7147,
"step": 7040
},
{
"epoch": 1.52,
"learning_rate": 0.0001487721839913075,
"loss": 0.7209,
"step": 7060
},
{
"epoch": 1.53,
"learning_rate": 0.00014833755885548713,
"loss": 0.724,
"step": 7080
},
{
"epoch": 1.53,
"learning_rate": 0.00014790293371966676,
"loss": 0.7256,
"step": 7100
},
{
"epoch": 1.54,
"learning_rate": 0.0001474683085838464,
"loss": 0.7246,
"step": 7120
},
{
"epoch": 1.54,
"learning_rate": 0.00014703368344802606,
"loss": 0.7103,
"step": 7140
},
{
"epoch": 1.54,
"learning_rate": 0.0001465990583122057,
"loss": 0.7223,
"step": 7160
},
{
"epoch": 1.55,
"learning_rate": 0.00014616443317638534,
"loss": 0.7149,
"step": 7180
},
{
"epoch": 1.55,
"learning_rate": 0.000145729808040565,
"loss": 0.7214,
"step": 7200
},
{
"epoch": 1.55,
"eval_loss": 0.7280930876731873,
"eval_runtime": 25.4883,
"eval_samples_per_second": 78.467,
"eval_steps_per_second": 1.255,
"step": 7200
},
{
"epoch": 1.56,
"learning_rate": 0.00014529518290474464,
"loss": 0.7118,
"step": 7220
},
{
"epoch": 1.56,
"learning_rate": 0.00014486055776892427,
"loss": 0.7171,
"step": 7240
},
{
"epoch": 1.57,
"learning_rate": 0.00014442593263310394,
"loss": 0.7191,
"step": 7260
},
{
"epoch": 1.57,
"learning_rate": 0.00014399130749728357,
"loss": 0.7155,
"step": 7280
},
{
"epoch": 1.57,
"learning_rate": 0.0001435566823614632,
"loss": 0.7198,
"step": 7300
},
{
"epoch": 1.58,
"learning_rate": 0.00014312205722564287,
"loss": 0.7188,
"step": 7320
},
{
"epoch": 1.58,
"learning_rate": 0.0001426874320898225,
"loss": 0.7236,
"step": 7340
},
{
"epoch": 1.59,
"learning_rate": 0.00014225280695400215,
"loss": 0.712,
"step": 7360
},
{
"epoch": 1.59,
"learning_rate": 0.0001418181818181818,
"loss": 0.7181,
"step": 7380
},
{
"epoch": 1.6,
"learning_rate": 0.00014138355668236145,
"loss": 0.7198,
"step": 7400
},
{
"epoch": 1.6,
"eval_loss": 0.7276077270507812,
"eval_runtime": 25.4843,
"eval_samples_per_second": 78.48,
"eval_steps_per_second": 1.256,
"step": 7400
},
{
"epoch": 1.6,
"learning_rate": 0.00014094893154654108,
"loss": 0.7187,
"step": 7420
},
{
"epoch": 1.61,
"learning_rate": 0.00014051430641072075,
"loss": 0.7153,
"step": 7440
},
{
"epoch": 1.61,
"learning_rate": 0.00014007968127490038,
"loss": 0.7208,
"step": 7460
},
{
"epoch": 1.61,
"learning_rate": 0.00013964505613908002,
"loss": 0.7153,
"step": 7480
},
{
"epoch": 1.62,
"learning_rate": 0.00013921043100325968,
"loss": 0.7207,
"step": 7500
},
{
"epoch": 1.62,
"learning_rate": 0.00013877580586743932,
"loss": 0.7167,
"step": 7520
},
{
"epoch": 1.63,
"learning_rate": 0.00013834118073161896,
"loss": 0.7183,
"step": 7540
},
{
"epoch": 1.63,
"learning_rate": 0.00013792828685258964,
"loss": 0.7196,
"step": 7560
},
{
"epoch": 1.64,
"learning_rate": 0.00013749366171676928,
"loss": 0.7233,
"step": 7580
},
{
"epoch": 1.64,
"learning_rate": 0.00013705903658094894,
"loss": 0.7237,
"step": 7600
},
{
"epoch": 1.64,
"eval_loss": 0.7260885238647461,
"eval_runtime": 25.503,
"eval_samples_per_second": 78.422,
"eval_steps_per_second": 1.255,
"step": 7600
},
{
"epoch": 1.64,
"learning_rate": 0.00013662441144512855,
"loss": 0.72,
"step": 7620
},
{
"epoch": 1.65,
"learning_rate": 0.0001361897863093082,
"loss": 0.7094,
"step": 7640
},
{
"epoch": 1.65,
"learning_rate": 0.00013575516117348785,
"loss": 0.7111,
"step": 7660
},
{
"epoch": 1.66,
"learning_rate": 0.00013532053603766749,
"loss": 0.7182,
"step": 7680
},
{
"epoch": 1.66,
"learning_rate": 0.00013488591090184715,
"loss": 0.7182,
"step": 7700
},
{
"epoch": 1.67,
"learning_rate": 0.00013445128576602679,
"loss": 0.7183,
"step": 7720
},
{
"epoch": 1.67,
"learning_rate": 0.00013401666063020642,
"loss": 0.7112,
"step": 7740
},
{
"epoch": 1.67,
"learning_rate": 0.00013358203549438609,
"loss": 0.7183,
"step": 7760
},
{
"epoch": 1.68,
"learning_rate": 0.00013314741035856572,
"loss": 0.7152,
"step": 7780
},
{
"epoch": 1.68,
"learning_rate": 0.00013271278522274536,
"loss": 0.7233,
"step": 7800
},
{
"epoch": 1.68,
"eval_loss": 0.7252987027168274,
"eval_runtime": 25.5066,
"eval_samples_per_second": 78.411,
"eval_steps_per_second": 1.255,
"step": 7800
},
{
"epoch": 1.69,
"learning_rate": 0.00013227816008692502,
"loss": 0.7124,
"step": 7820
},
{
"epoch": 1.69,
"learning_rate": 0.00013184353495110466,
"loss": 0.7109,
"step": 7840
},
{
"epoch": 1.7,
"learning_rate": 0.0001314089098152843,
"loss": 0.7132,
"step": 7860
},
{
"epoch": 1.7,
"learning_rate": 0.00013097428467946396,
"loss": 0.7157,
"step": 7880
},
{
"epoch": 1.7,
"learning_rate": 0.0001305396595436436,
"loss": 0.7237,
"step": 7900
},
{
"epoch": 1.71,
"learning_rate": 0.00013010503440782323,
"loss": 0.7176,
"step": 7920
},
{
"epoch": 1.71,
"learning_rate": 0.0001296704092720029,
"loss": 0.7199,
"step": 7940
},
{
"epoch": 1.72,
"learning_rate": 0.00012923578413618253,
"loss": 0.7119,
"step": 7960
},
{
"epoch": 1.72,
"learning_rate": 0.00012880115900036217,
"loss": 0.717,
"step": 7980
},
{
"epoch": 1.73,
"learning_rate": 0.00012836653386454183,
"loss": 0.7155,
"step": 8000
},
{
"epoch": 1.73,
"eval_loss": 0.7248360514640808,
"eval_runtime": 25.5301,
"eval_samples_per_second": 78.339,
"eval_steps_per_second": 1.253,
"step": 8000
},
{
"epoch": 1.73,
"learning_rate": 0.00012793190872872147,
"loss": 0.7085,
"step": 8020
},
{
"epoch": 1.73,
"learning_rate": 0.0001274972835929011,
"loss": 0.7174,
"step": 8040
},
{
"epoch": 1.74,
"learning_rate": 0.00012706265845708077,
"loss": 0.7224,
"step": 8060
},
{
"epoch": 1.74,
"learning_rate": 0.0001266280333212604,
"loss": 0.7169,
"step": 8080
},
{
"epoch": 1.75,
"learning_rate": 0.00012619340818544004,
"loss": 0.7191,
"step": 8100
},
{
"epoch": 1.75,
"learning_rate": 0.0001257587830496197,
"loss": 0.7179,
"step": 8120
},
{
"epoch": 1.76,
"learning_rate": 0.00012532415791379934,
"loss": 0.7208,
"step": 8140
},
{
"epoch": 1.76,
"learning_rate": 0.00012488953277797898,
"loss": 0.7168,
"step": 8160
},
{
"epoch": 1.76,
"learning_rate": 0.00012445490764215864,
"loss": 0.7101,
"step": 8180
},
{
"epoch": 1.77,
"learning_rate": 0.00012402028250633828,
"loss": 0.7167,
"step": 8200
},
{
"epoch": 1.77,
"eval_loss": 0.7242170572280884,
"eval_runtime": 25.4873,
"eval_samples_per_second": 78.47,
"eval_steps_per_second": 1.256,
"step": 8200
},
{
"epoch": 1.77,
"learning_rate": 0.00012358565737051791,
"loss": 0.7062,
"step": 8220
},
{
"epoch": 1.78,
"learning_rate": 0.00012315103223469758,
"loss": 0.7177,
"step": 8240
},
{
"epoch": 1.78,
"learning_rate": 0.00012271640709887721,
"loss": 0.7035,
"step": 8260
},
{
"epoch": 1.79,
"learning_rate": 0.00012228178196305685,
"loss": 0.7157,
"step": 8280
},
{
"epoch": 1.79,
"learning_rate": 0.0001218471568272365,
"loss": 0.7196,
"step": 8300
},
{
"epoch": 1.8,
"learning_rate": 0.00012141253169141615,
"loss": 0.7105,
"step": 8320
},
{
"epoch": 1.8,
"learning_rate": 0.00012097790655559579,
"loss": 0.7105,
"step": 8340
},
{
"epoch": 1.8,
"learning_rate": 0.00012054328141977544,
"loss": 0.7139,
"step": 8360
},
{
"epoch": 1.81,
"learning_rate": 0.00012010865628395509,
"loss": 0.7215,
"step": 8380
},
{
"epoch": 1.81,
"learning_rate": 0.00011967403114813472,
"loss": 0.725,
"step": 8400
},
{
"epoch": 1.81,
"eval_loss": 0.7237139344215393,
"eval_runtime": 25.506,
"eval_samples_per_second": 78.413,
"eval_steps_per_second": 1.255,
"step": 8400
},
{
"epoch": 1.82,
"learning_rate": 0.00011923940601231437,
"loss": 0.7107,
"step": 8420
},
{
"epoch": 1.82,
"learning_rate": 0.00011880478087649402,
"loss": 0.7095,
"step": 8440
},
{
"epoch": 1.83,
"learning_rate": 0.00011837015574067366,
"loss": 0.7061,
"step": 8460
},
{
"epoch": 1.83,
"learning_rate": 0.0001179355306048533,
"loss": 0.716,
"step": 8480
},
{
"epoch": 1.83,
"learning_rate": 0.00011750090546903295,
"loss": 0.7203,
"step": 8500
},
{
"epoch": 1.84,
"learning_rate": 0.00011706628033321258,
"loss": 0.7098,
"step": 8520
},
{
"epoch": 1.84,
"learning_rate": 0.00011663165519739223,
"loss": 0.7104,
"step": 8540
},
{
"epoch": 1.85,
"learning_rate": 0.00011619703006157188,
"loss": 0.7051,
"step": 8560
},
{
"epoch": 1.85,
"learning_rate": 0.00011576240492575152,
"loss": 0.7198,
"step": 8580
},
{
"epoch": 1.86,
"learning_rate": 0.00011532777978993117,
"loss": 0.7175,
"step": 8600
},
{
"epoch": 1.86,
"eval_loss": 0.7230754494667053,
"eval_runtime": 25.5133,
"eval_samples_per_second": 78.39,
"eval_steps_per_second": 1.254,
"step": 8600
},
{
"epoch": 1.86,
"learning_rate": 0.00011489315465411082,
"loss": 0.7046,
"step": 8620
},
{
"epoch": 1.86,
"learning_rate": 0.00011445852951829046,
"loss": 0.7176,
"step": 8640
},
{
"epoch": 1.87,
"learning_rate": 0.0001140239043824701,
"loss": 0.7193,
"step": 8660
},
{
"epoch": 1.87,
"learning_rate": 0.00011358927924664976,
"loss": 0.7046,
"step": 8680
},
{
"epoch": 1.88,
"learning_rate": 0.00011315465411082939,
"loss": 0.7116,
"step": 8700
},
{
"epoch": 1.88,
"learning_rate": 0.00011274176023180006,
"loss": 0.7152,
"step": 8720
},
{
"epoch": 1.89,
"learning_rate": 0.00011230713509597971,
"loss": 0.7164,
"step": 8740
},
{
"epoch": 1.89,
"learning_rate": 0.00011187250996015936,
"loss": 0.7192,
"step": 8760
},
{
"epoch": 1.89,
"learning_rate": 0.000111437884824339,
"loss": 0.7124,
"step": 8780
},
{
"epoch": 1.9,
"learning_rate": 0.00011100325968851865,
"loss": 0.7032,
"step": 8800
},
{
"epoch": 1.9,
"eval_loss": 0.7217770218849182,
"eval_runtime": 25.4723,
"eval_samples_per_second": 78.517,
"eval_steps_per_second": 1.256,
"step": 8800
},
{
"epoch": 1.9,
"learning_rate": 0.0001105686345526983,
"loss": 0.7157,
"step": 8820
},
{
"epoch": 1.91,
"learning_rate": 0.00011013400941687794,
"loss": 0.7115,
"step": 8840
},
{
"epoch": 1.91,
"learning_rate": 0.00010969938428105759,
"loss": 0.7137,
"step": 8860
},
{
"epoch": 1.92,
"learning_rate": 0.00010926475914523724,
"loss": 0.7176,
"step": 8880
},
{
"epoch": 1.92,
"learning_rate": 0.00010883013400941687,
"loss": 0.7081,
"step": 8900
},
{
"epoch": 1.92,
"learning_rate": 0.00010839550887359652,
"loss": 0.7233,
"step": 8920
},
{
"epoch": 1.93,
"learning_rate": 0.00010796088373777617,
"loss": 0.7058,
"step": 8940
},
{
"epoch": 1.93,
"learning_rate": 0.00010752625860195581,
"loss": 0.7154,
"step": 8960
},
{
"epoch": 1.94,
"learning_rate": 0.00010709163346613546,
"loss": 0.7135,
"step": 8980
},
{
"epoch": 1.94,
"learning_rate": 0.00010665700833031508,
"loss": 0.7078,
"step": 9000
},
{
"epoch": 1.94,
"eval_loss": 0.7215875387191772,
"eval_runtime": 25.484,
"eval_samples_per_second": 78.481,
"eval_steps_per_second": 1.256,
"step": 9000
},
{
"epoch": 1.95,
"learning_rate": 0.00010622238319449473,
"loss": 0.7061,
"step": 9020
},
{
"epoch": 1.95,
"learning_rate": 0.00010578775805867438,
"loss": 0.7174,
"step": 9040
},
{
"epoch": 1.95,
"learning_rate": 0.00010535313292285402,
"loss": 0.7132,
"step": 9060
},
{
"epoch": 1.96,
"learning_rate": 0.00010491850778703367,
"loss": 0.7247,
"step": 9080
},
{
"epoch": 1.96,
"learning_rate": 0.00010448388265121332,
"loss": 0.7064,
"step": 9100
},
{
"epoch": 1.97,
"learning_rate": 0.00010404925751539295,
"loss": 0.7098,
"step": 9120
},
{
"epoch": 1.97,
"learning_rate": 0.0001036146323795726,
"loss": 0.708,
"step": 9140
},
{
"epoch": 1.98,
"learning_rate": 0.00010318000724375225,
"loss": 0.7144,
"step": 9160
},
{
"epoch": 1.98,
"learning_rate": 0.00010274538210793189,
"loss": 0.7151,
"step": 9180
},
{
"epoch": 1.98,
"learning_rate": 0.00010231075697211154,
"loss": 0.718,
"step": 9200
},
{
"epoch": 1.98,
"eval_loss": 0.7208251357078552,
"eval_runtime": 25.5022,
"eval_samples_per_second": 78.425,
"eval_steps_per_second": 1.255,
"step": 9200
},
{
"epoch": 1.99,
"learning_rate": 0.00010187613183629119,
"loss": 0.7108,
"step": 9220
},
{
"epoch": 1.99,
"learning_rate": 0.00010144150670047083,
"loss": 0.6952,
"step": 9240
},
{
"epoch": 2.0,
"learning_rate": 0.00010100688156465048,
"loss": 0.7013,
"step": 9260
},
{
"epoch": 2.0,
"learning_rate": 0.00010057225642883013,
"loss": 0.7013,
"step": 9280
},
{
"epoch": 2.01,
"learning_rate": 0.00010013763129300976,
"loss": 0.7049,
"step": 9300
},
{
"epoch": 2.01,
"learning_rate": 9.970300615718941e-05,
"loss": 0.7093,
"step": 9320
},
{
"epoch": 2.02,
"learning_rate": 9.926838102136906e-05,
"loss": 0.713,
"step": 9340
},
{
"epoch": 2.02,
"learning_rate": 9.88337558855487e-05,
"loss": 0.7108,
"step": 9360
},
{
"epoch": 2.02,
"learning_rate": 9.839913074972835e-05,
"loss": 0.7115,
"step": 9380
},
{
"epoch": 2.03,
"learning_rate": 9.7964505613908e-05,
"loss": 0.7119,
"step": 9400
},
{
"epoch": 2.03,
"eval_loss": 0.7202969789505005,
"eval_runtime": 25.504,
"eval_samples_per_second": 78.419,
"eval_steps_per_second": 1.255,
"step": 9400
},
{
"epoch": 2.03,
"learning_rate": 9.752988047808764e-05,
"loss": 0.7107,
"step": 9420
},
{
"epoch": 2.04,
"learning_rate": 9.709525534226729e-05,
"loss": 0.7065,
"step": 9440
},
{
"epoch": 2.04,
"learning_rate": 9.666063020644694e-05,
"loss": 0.7121,
"step": 9460
},
{
"epoch": 2.05,
"learning_rate": 9.622600507062657e-05,
"loss": 0.7163,
"step": 9480
},
{
"epoch": 2.05,
"learning_rate": 9.579137993480622e-05,
"loss": 0.7026,
"step": 9500
},
{
"epoch": 2.05,
"learning_rate": 9.535675479898587e-05,
"loss": 0.7158,
"step": 9520
},
{
"epoch": 2.06,
"learning_rate": 9.492212966316551e-05,
"loss": 0.7016,
"step": 9540
},
{
"epoch": 2.06,
"learning_rate": 9.448750452734516e-05,
"loss": 0.7149,
"step": 9560
},
{
"epoch": 2.07,
"learning_rate": 9.405287939152481e-05,
"loss": 0.7079,
"step": 9580
},
{
"epoch": 2.07,
"learning_rate": 9.361825425570445e-05,
"loss": 0.709,
"step": 9600
},
{
"epoch": 2.07,
"eval_loss": 0.7194134593009949,
"eval_runtime": 25.5286,
"eval_samples_per_second": 78.343,
"eval_steps_per_second": 1.253,
"step": 9600
},
{
"epoch": 2.08,
"learning_rate": 9.31836291198841e-05,
"loss": 0.7127,
"step": 9620
},
{
"epoch": 2.08,
"learning_rate": 9.274900398406375e-05,
"loss": 0.7037,
"step": 9640
},
{
"epoch": 2.08,
"learning_rate": 9.231437884824338e-05,
"loss": 0.7114,
"step": 9660
},
{
"epoch": 2.09,
"learning_rate": 9.187975371242303e-05,
"loss": 0.706,
"step": 9680
},
{
"epoch": 2.09,
"learning_rate": 9.144512857660268e-05,
"loss": 0.7026,
"step": 9700
},
{
"epoch": 2.1,
"learning_rate": 9.101050344078232e-05,
"loss": 0.7079,
"step": 9720
},
{
"epoch": 2.1,
"learning_rate": 9.057587830496197e-05,
"loss": 0.7053,
"step": 9740
},
{
"epoch": 2.11,
"learning_rate": 9.014125316914162e-05,
"loss": 0.7125,
"step": 9760
},
{
"epoch": 2.11,
"learning_rate": 8.970662803332126e-05,
"loss": 0.7045,
"step": 9780
},
{
"epoch": 2.11,
"learning_rate": 8.92720028975009e-05,
"loss": 0.7109,
"step": 9800
},
{
"epoch": 2.11,
"eval_loss": 0.7186465859413147,
"eval_runtime": 25.5049,
"eval_samples_per_second": 78.416,
"eval_steps_per_second": 1.255,
"step": 9800
},
{
"epoch": 2.12,
"learning_rate": 8.883737776168056e-05,
"loss": 0.7035,
"step": 9820
},
{
"epoch": 2.12,
"learning_rate": 8.840275262586019e-05,
"loss": 0.7073,
"step": 9840
},
{
"epoch": 2.13,
"learning_rate": 8.796812749003983e-05,
"loss": 0.7114,
"step": 9860
},
{
"epoch": 2.13,
"learning_rate": 8.753350235421946e-05,
"loss": 0.7066,
"step": 9880
},
{
"epoch": 2.14,
"learning_rate": 8.709887721839911e-05,
"loss": 0.7055,
"step": 9900
},
{
"epoch": 2.14,
"learning_rate": 8.666425208257877e-05,
"loss": 0.7064,
"step": 9920
},
{
"epoch": 2.14,
"learning_rate": 8.62296269467584e-05,
"loss": 0.7154,
"step": 9940
},
{
"epoch": 2.15,
"learning_rate": 8.579500181093805e-05,
"loss": 0.7099,
"step": 9960
},
{
"epoch": 2.15,
"learning_rate": 8.53603766751177e-05,
"loss": 0.7112,
"step": 9980
},
{
"epoch": 2.16,
"learning_rate": 8.492575153929734e-05,
"loss": 0.7086,
"step": 10000
},
{
"epoch": 2.16,
"eval_loss": 0.7181739211082458,
"eval_runtime": 25.5087,
"eval_samples_per_second": 78.405,
"eval_steps_per_second": 1.254,
"step": 10000
},
{
"epoch": 2.16,
"learning_rate": 8.449112640347699e-05,
"loss": 0.7155,
"step": 10020
},
{
"epoch": 2.17,
"learning_rate": 8.405650126765664e-05,
"loss": 0.7097,
"step": 10040
},
{
"epoch": 2.17,
"learning_rate": 8.362187613183627e-05,
"loss": 0.7025,
"step": 10060
},
{
"epoch": 2.17,
"learning_rate": 8.318725099601592e-05,
"loss": 0.7065,
"step": 10080
},
{
"epoch": 2.18,
"learning_rate": 8.275262586019557e-05,
"loss": 0.6982,
"step": 10100
},
{
"epoch": 2.18,
"learning_rate": 8.231800072437521e-05,
"loss": 0.7039,
"step": 10120
},
{
"epoch": 2.19,
"learning_rate": 8.188337558855486e-05,
"loss": 0.7097,
"step": 10140
},
{
"epoch": 2.19,
"learning_rate": 8.144875045273451e-05,
"loss": 0.7089,
"step": 10160
},
{
"epoch": 2.2,
"learning_rate": 8.101412531691415e-05,
"loss": 0.7018,
"step": 10180
},
{
"epoch": 2.2,
"learning_rate": 8.05795001810938e-05,
"loss": 0.7025,
"step": 10200
},
{
"epoch": 2.2,
"eval_loss": 0.7179592251777649,
"eval_runtime": 25.4993,
"eval_samples_per_second": 78.433,
"eval_steps_per_second": 1.255,
"step": 10200
},
{
"epoch": 2.2,
"learning_rate": 8.014487504527345e-05,
"loss": 0.7067,
"step": 10220
},
{
"epoch": 2.21,
"learning_rate": 7.971024990945308e-05,
"loss": 0.71,
"step": 10240
},
{
"epoch": 2.21,
"learning_rate": 7.927562477363273e-05,
"loss": 0.7255,
"step": 10260
},
{
"epoch": 2.22,
"learning_rate": 7.884099963781238e-05,
"loss": 0.7065,
"step": 10280
},
{
"epoch": 2.22,
"learning_rate": 7.840637450199202e-05,
"loss": 0.712,
"step": 10300
},
{
"epoch": 2.23,
"learning_rate": 7.797174936617167e-05,
"loss": 0.7132,
"step": 10320
},
{
"epoch": 2.23,
"learning_rate": 7.753712423035132e-05,
"loss": 0.7106,
"step": 10340
},
{
"epoch": 2.24,
"learning_rate": 7.710249909453096e-05,
"loss": 0.708,
"step": 10360
},
{
"epoch": 2.24,
"learning_rate": 7.666787395871061e-05,
"loss": 0.7054,
"step": 10380
},
{
"epoch": 2.24,
"learning_rate": 7.623324882289026e-05,
"loss": 0.7087,
"step": 10400
},
{
"epoch": 2.24,
"eval_loss": 0.717901349067688,
"eval_runtime": 25.4862,
"eval_samples_per_second": 78.474,
"eval_steps_per_second": 1.256,
"step": 10400
},
{
"epoch": 2.25,
"learning_rate": 7.57986236870699e-05,
"loss": 0.7014,
"step": 10420
},
{
"epoch": 2.25,
"learning_rate": 7.536399855124954e-05,
"loss": 0.7103,
"step": 10440
},
{
"epoch": 2.26,
"learning_rate": 7.49293734154292e-05,
"loss": 0.7089,
"step": 10460
},
{
"epoch": 2.26,
"learning_rate": 7.449474827960883e-05,
"loss": 0.704,
"step": 10480
},
{
"epoch": 2.27,
"learning_rate": 7.406012314378847e-05,
"loss": 0.7074,
"step": 10500
},
{
"epoch": 2.27,
"learning_rate": 7.362549800796812e-05,
"loss": 0.7094,
"step": 10520
},
{
"epoch": 2.27,
"learning_rate": 7.319087287214777e-05,
"loss": 0.7069,
"step": 10540
},
{
"epoch": 2.28,
"learning_rate": 7.27562477363274e-05,
"loss": 0.7081,
"step": 10560
},
{
"epoch": 2.28,
"learning_rate": 7.232162260050705e-05,
"loss": 0.7036,
"step": 10580
},
{
"epoch": 2.29,
"learning_rate": 7.18869974646867e-05,
"loss": 0.6984,
"step": 10600
},
{
"epoch": 2.29,
"eval_loss": 0.7175166010856628,
"eval_runtime": 25.5016,
"eval_samples_per_second": 78.426,
"eval_steps_per_second": 1.255,
"step": 10600
},
{
"epoch": 2.29,
"learning_rate": 7.145237232886634e-05,
"loss": 0.7097,
"step": 10620
},
{
"epoch": 2.3,
"learning_rate": 7.101774719304599e-05,
"loss": 0.7143,
"step": 10640
},
{
"epoch": 2.3,
"learning_rate": 7.058312205722564e-05,
"loss": 0.7099,
"step": 10660
},
{
"epoch": 2.3,
"learning_rate": 7.014849692140528e-05,
"loss": 0.6994,
"step": 10680
},
{
"epoch": 2.31,
"learning_rate": 6.971387178558493e-05,
"loss": 0.7129,
"step": 10700
},
{
"epoch": 2.31,
"learning_rate": 6.927924664976458e-05,
"loss": 0.7067,
"step": 10720
},
{
"epoch": 2.32,
"learning_rate": 6.884462151394421e-05,
"loss": 0.7044,
"step": 10740
},
{
"epoch": 2.32,
"learning_rate": 6.840999637812386e-05,
"loss": 0.7092,
"step": 10760
},
{
"epoch": 2.33,
"learning_rate": 6.797537124230351e-05,
"loss": 0.7075,
"step": 10780
},
{
"epoch": 2.33,
"learning_rate": 6.754074610648315e-05,
"loss": 0.7073,
"step": 10800
},
{
"epoch": 2.33,
"eval_loss": 0.7168901562690735,
"eval_runtime": 25.5153,
"eval_samples_per_second": 78.384,
"eval_steps_per_second": 1.254,
"step": 10800
},
{
"epoch": 2.33,
"learning_rate": 6.71061209706628e-05,
"loss": 0.7088,
"step": 10820
},
{
"epoch": 2.34,
"learning_rate": 6.667149583484245e-05,
"loss": 0.7046,
"step": 10840
},
{
"epoch": 2.34,
"learning_rate": 6.623687069902209e-05,
"loss": 0.7029,
"step": 10860
},
{
"epoch": 2.35,
"learning_rate": 6.580224556320174e-05,
"loss": 0.7055,
"step": 10880
},
{
"epoch": 2.35,
"learning_rate": 6.536762042738139e-05,
"loss": 0.7095,
"step": 10900
},
{
"epoch": 2.36,
"learning_rate": 6.493299529156102e-05,
"loss": 0.7057,
"step": 10920
},
{
"epoch": 2.36,
"learning_rate": 6.449837015574066e-05,
"loss": 0.7064,
"step": 10940
},
{
"epoch": 2.36,
"learning_rate": 6.406374501992031e-05,
"loss": 0.7039,
"step": 10960
},
{
"epoch": 2.37,
"learning_rate": 6.362911988409996e-05,
"loss": 0.7109,
"step": 10980
},
{
"epoch": 2.37,
"learning_rate": 6.31944947482796e-05,
"loss": 0.7051,
"step": 11000
},
{
"epoch": 2.37,
"eval_loss": 0.7164381146430969,
"eval_runtime": 25.4817,
"eval_samples_per_second": 78.488,
"eval_steps_per_second": 1.256,
"step": 11000
},
{
"epoch": 2.38,
"learning_rate": 6.275986961245924e-05,
"loss": 0.7117,
"step": 11020
},
{
"epoch": 2.38,
"learning_rate": 6.23252444766389e-05,
"loss": 0.6972,
"step": 11040
},
{
"epoch": 2.39,
"learning_rate": 6.189061934081853e-05,
"loss": 0.7087,
"step": 11060
},
{
"epoch": 2.39,
"learning_rate": 6.145599420499818e-05,
"loss": 0.703,
"step": 11080
},
{
"epoch": 2.39,
"learning_rate": 6.1021369069177825e-05,
"loss": 0.7062,
"step": 11100
},
{
"epoch": 2.4,
"learning_rate": 6.0586743933357475e-05,
"loss": 0.7018,
"step": 11120
},
{
"epoch": 2.4,
"learning_rate": 6.015211879753712e-05,
"loss": 0.7003,
"step": 11140
},
{
"epoch": 2.41,
"learning_rate": 5.971749366171676e-05,
"loss": 0.7005,
"step": 11160
},
{
"epoch": 2.41,
"learning_rate": 5.928286852589641e-05,
"loss": 0.7099,
"step": 11180
},
{
"epoch": 2.42,
"learning_rate": 5.8848243390076054e-05,
"loss": 0.7002,
"step": 11200
},
{
"epoch": 2.42,
"eval_loss": 0.7161288857460022,
"eval_runtime": 25.5084,
"eval_samples_per_second": 78.406,
"eval_steps_per_second": 1.254,
"step": 11200
},
{
"epoch": 2.42,
"learning_rate": 5.84136182542557e-05,
"loss": 0.7071,
"step": 11220
},
{
"epoch": 2.43,
"learning_rate": 5.797899311843535e-05,
"loss": 0.7028,
"step": 11240
},
{
"epoch": 2.43,
"learning_rate": 5.754436798261499e-05,
"loss": 0.7199,
"step": 11260
},
{
"epoch": 2.43,
"learning_rate": 5.7109742846794634e-05,
"loss": 0.6974,
"step": 11280
},
{
"epoch": 2.44,
"learning_rate": 5.6675117710974284e-05,
"loss": 0.7003,
"step": 11300
},
{
"epoch": 2.44,
"learning_rate": 5.624049257515393e-05,
"loss": 0.7079,
"step": 11320
},
{
"epoch": 2.45,
"learning_rate": 5.580586743933357e-05,
"loss": 0.6988,
"step": 11340
},
{
"epoch": 2.45,
"learning_rate": 5.537124230351322e-05,
"loss": 0.7047,
"step": 11360
},
{
"epoch": 2.46,
"learning_rate": 5.493661716769286e-05,
"loss": 0.6946,
"step": 11380
},
{
"epoch": 2.46,
"learning_rate": 5.45019920318725e-05,
"loss": 0.7096,
"step": 11400
},
{
"epoch": 2.46,
"eval_loss": 0.7155815958976746,
"eval_runtime": 25.525,
"eval_samples_per_second": 78.355,
"eval_steps_per_second": 1.254,
"step": 11400
},
{
"epoch": 2.46,
"learning_rate": 5.406736689605215e-05,
"loss": 0.709,
"step": 11420
},
{
"epoch": 2.47,
"learning_rate": 5.3632741760231794e-05,
"loss": 0.7112,
"step": 11440
},
{
"epoch": 2.47,
"learning_rate": 5.319811662441144e-05,
"loss": 0.6983,
"step": 11460
},
{
"epoch": 2.48,
"learning_rate": 5.276349148859109e-05,
"loss": 0.7,
"step": 11480
},
{
"epoch": 2.48,
"learning_rate": 5.232886635277073e-05,
"loss": 0.7006,
"step": 11500
},
{
"epoch": 2.49,
"learning_rate": 5.189424121695037e-05,
"loss": 0.7068,
"step": 11520
},
{
"epoch": 2.49,
"learning_rate": 5.1459616081130023e-05,
"loss": 0.7012,
"step": 11540
},
{
"epoch": 2.49,
"learning_rate": 5.102499094530967e-05,
"loss": 0.7079,
"step": 11560
},
{
"epoch": 2.5,
"learning_rate": 5.059036580948931e-05,
"loss": 0.7031,
"step": 11580
},
{
"epoch": 2.5,
"learning_rate": 5.015574067366896e-05,
"loss": 0.7038,
"step": 11600
},
{
"epoch": 2.5,
"eval_loss": 0.7149330973625183,
"eval_runtime": 25.4843,
"eval_samples_per_second": 78.48,
"eval_steps_per_second": 1.256,
"step": 11600
},
{
"epoch": 2.51,
"learning_rate": 4.97211155378486e-05,
"loss": 0.6972,
"step": 11620
},
{
"epoch": 2.51,
"learning_rate": 4.9286490402028246e-05,
"loss": 0.7039,
"step": 11640
},
{
"epoch": 2.52,
"learning_rate": 4.885186526620789e-05,
"loss": 0.7052,
"step": 11660
},
{
"epoch": 2.52,
"learning_rate": 4.841724013038754e-05,
"loss": 0.7045,
"step": 11680
},
{
"epoch": 2.52,
"learning_rate": 4.798261499456718e-05,
"loss": 0.701,
"step": 11700
},
{
"epoch": 2.53,
"learning_rate": 4.7547989858746826e-05,
"loss": 0.7084,
"step": 11720
},
{
"epoch": 2.53,
"learning_rate": 4.7113364722926476e-05,
"loss": 0.6988,
"step": 11740
},
{
"epoch": 2.54,
"learning_rate": 4.667873958710612e-05,
"loss": 0.7155,
"step": 11760
},
{
"epoch": 2.54,
"learning_rate": 4.624411445128576e-05,
"loss": 0.7044,
"step": 11780
},
{
"epoch": 2.55,
"learning_rate": 4.5809489315465406e-05,
"loss": 0.7014,
"step": 11800
},
{
"epoch": 2.55,
"eval_loss": 0.714367151260376,
"eval_runtime": 25.4959,
"eval_samples_per_second": 78.444,
"eval_steps_per_second": 1.255,
"step": 11800
},
{
"epoch": 2.55,
"learning_rate": 4.537486417964505e-05,
"loss": 0.708,
"step": 11820
},
{
"epoch": 2.55,
"learning_rate": 4.494023904382469e-05,
"loss": 0.6976,
"step": 11840
},
{
"epoch": 2.56,
"learning_rate": 4.450561390800434e-05,
"loss": 0.7057,
"step": 11860
},
{
"epoch": 2.56,
"learning_rate": 4.4070988772183986e-05,
"loss": 0.7039,
"step": 11880
},
{
"epoch": 2.57,
"learning_rate": 4.363636363636363e-05,
"loss": 0.7089,
"step": 11900
},
{
"epoch": 2.57,
"learning_rate": 4.320173850054328e-05,
"loss": 0.7026,
"step": 11920
},
{
"epoch": 2.58,
"learning_rate": 4.276711336472292e-05,
"loss": 0.7023,
"step": 11940
},
{
"epoch": 2.58,
"learning_rate": 4.2332488228902565e-05,
"loss": 0.7006,
"step": 11960
},
{
"epoch": 2.58,
"learning_rate": 4.1897863093082215e-05,
"loss": 0.7008,
"step": 11980
},
{
"epoch": 2.59,
"learning_rate": 4.146323795726186e-05,
"loss": 0.7057,
"step": 12000
},
{
"epoch": 2.59,
"eval_loss": 0.7141902446746826,
"eval_runtime": 25.5019,
"eval_samples_per_second": 78.426,
"eval_steps_per_second": 1.255,
"step": 12000
},
{
"epoch": 2.59,
"learning_rate": 4.10286128214415e-05,
"loss": 0.7083,
"step": 12020
},
{
"epoch": 2.6,
"learning_rate": 4.059398768562115e-05,
"loss": 0.6986,
"step": 12040
},
{
"epoch": 2.6,
"learning_rate": 4.0159362549800795e-05,
"loss": 0.7076,
"step": 12060
},
{
"epoch": 2.61,
"learning_rate": 3.972473741398044e-05,
"loss": 0.7071,
"step": 12080
},
{
"epoch": 2.61,
"learning_rate": 3.929011227816009e-05,
"loss": 0.6984,
"step": 12100
},
{
"epoch": 2.61,
"learning_rate": 3.885548714233973e-05,
"loss": 0.7096,
"step": 12120
},
{
"epoch": 2.62,
"learning_rate": 3.8420862006519375e-05,
"loss": 0.7027,
"step": 12140
},
{
"epoch": 2.62,
"learning_rate": 3.7986236870699025e-05,
"loss": 0.7062,
"step": 12160
},
{
"epoch": 2.63,
"learning_rate": 3.755161173487867e-05,
"loss": 0.7049,
"step": 12180
},
{
"epoch": 2.63,
"learning_rate": 3.711698659905831e-05,
"loss": 0.7052,
"step": 12200
},
{
"epoch": 2.63,
"eval_loss": 0.7140177488327026,
"eval_runtime": 25.4673,
"eval_samples_per_second": 78.532,
"eval_steps_per_second": 1.257,
"step": 12200
},
{
"epoch": 2.64,
"learning_rate": 3.6682361463237955e-05,
"loss": 0.7011,
"step": 12220
},
{
"epoch": 2.64,
"learning_rate": 3.62477363274176e-05,
"loss": 0.7025,
"step": 12240
},
{
"epoch": 2.65,
"learning_rate": 3.581311119159725e-05,
"loss": 0.7006,
"step": 12260
},
{
"epoch": 2.65,
"learning_rate": 3.537848605577689e-05,
"loss": 0.7073,
"step": 12280
},
{
"epoch": 2.65,
"learning_rate": 3.4943860919956534e-05,
"loss": 0.7033,
"step": 12300
},
{
"epoch": 2.66,
"learning_rate": 3.4509235784136184e-05,
"loss": 0.6992,
"step": 12320
},
{
"epoch": 2.66,
"learning_rate": 3.407461064831582e-05,
"loss": 0.7043,
"step": 12340
},
{
"epoch": 2.67,
"learning_rate": 3.363998551249547e-05,
"loss": 0.7083,
"step": 12360
},
{
"epoch": 2.67,
"learning_rate": 3.3205360376675114e-05,
"loss": 0.7086,
"step": 12380
},
{
"epoch": 2.68,
"learning_rate": 3.277073524085476e-05,
"loss": 0.7168,
"step": 12400
},
{
"epoch": 2.68,
"eval_loss": 0.7138265371322632,
"eval_runtime": 25.5077,
"eval_samples_per_second": 78.408,
"eval_steps_per_second": 1.255,
"step": 12400
},
{
"epoch": 2.68,
"learning_rate": 3.233611010503441e-05,
"loss": 0.7026,
"step": 12420
},
{
"epoch": 2.68,
"learning_rate": 3.190148496921405e-05,
"loss": 0.7097,
"step": 12440
},
{
"epoch": 2.69,
"learning_rate": 3.1466859833393694e-05,
"loss": 0.7094,
"step": 12460
},
{
"epoch": 2.69,
"learning_rate": 3.1032234697573344e-05,
"loss": 0.6971,
"step": 12480
},
{
"epoch": 2.7,
"learning_rate": 3.059760956175299e-05,
"loss": 0.6977,
"step": 12500
},
{
"epoch": 2.7,
"learning_rate": 3.016298442593263e-05,
"loss": 0.6945,
"step": 12520
},
{
"epoch": 2.71,
"learning_rate": 2.9728359290112277e-05,
"loss": 0.6998,
"step": 12540
},
{
"epoch": 2.71,
"learning_rate": 2.929373415429192e-05,
"loss": 0.7067,
"step": 12560
},
{
"epoch": 2.71,
"learning_rate": 2.8859109018471563e-05,
"loss": 0.6935,
"step": 12580
},
{
"epoch": 2.72,
"learning_rate": 2.842448388265121e-05,
"loss": 0.6927,
"step": 12600
},
{
"epoch": 2.72,
"eval_loss": 0.7132371664047241,
"eval_runtime": 25.516,
"eval_samples_per_second": 78.382,
"eval_steps_per_second": 1.254,
"step": 12600
},
{
"epoch": 2.72,
"learning_rate": 2.7989858746830857e-05,
"loss": 0.7025,
"step": 12620
},
{
"epoch": 2.73,
"learning_rate": 2.75552336110105e-05,
"loss": 0.7098,
"step": 12640
},
{
"epoch": 2.73,
"learning_rate": 2.7120608475190147e-05,
"loss": 0.6939,
"step": 12660
},
{
"epoch": 2.74,
"learning_rate": 2.6685983339369793e-05,
"loss": 0.7038,
"step": 12680
},
{
"epoch": 2.74,
"learning_rate": 2.6251358203549436e-05,
"loss": 0.7039,
"step": 12700
},
{
"epoch": 2.74,
"learning_rate": 2.5816733067729083e-05,
"loss": 0.7018,
"step": 12720
},
{
"epoch": 2.75,
"learning_rate": 2.538210793190873e-05,
"loss": 0.6943,
"step": 12740
},
{
"epoch": 2.75,
"learning_rate": 2.4947482796088373e-05,
"loss": 0.7007,
"step": 12760
},
{
"epoch": 2.76,
"learning_rate": 2.4512857660268016e-05,
"loss": 0.7019,
"step": 12780
},
{
"epoch": 2.76,
"learning_rate": 2.407823252444766e-05,
"loss": 0.6957,
"step": 12800
},
{
"epoch": 2.76,
"eval_loss": 0.7126932144165039,
"eval_runtime": 25.4915,
"eval_samples_per_second": 78.458,
"eval_steps_per_second": 1.255,
"step": 12800
},
{
"epoch": 2.77,
"learning_rate": 2.3643607388627306e-05,
"loss": 0.6993,
"step": 12820
},
{
"epoch": 2.77,
"learning_rate": 2.3208982252806953e-05,
"loss": 0.6951,
"step": 12840
},
{
"epoch": 2.77,
"learning_rate": 2.2774357116986596e-05,
"loss": 0.7056,
"step": 12860
},
{
"epoch": 2.78,
"learning_rate": 2.2339731981166243e-05,
"loss": 0.7153,
"step": 12880
},
{
"epoch": 2.78,
"learning_rate": 2.190510684534589e-05,
"loss": 0.7022,
"step": 12900
},
{
"epoch": 2.79,
"learning_rate": 2.1470481709525532e-05,
"loss": 0.7078,
"step": 12920
},
{
"epoch": 2.79,
"learning_rate": 2.103585657370518e-05,
"loss": 0.6969,
"step": 12940
},
{
"epoch": 2.8,
"learning_rate": 2.0601231437884826e-05,
"loss": 0.7056,
"step": 12960
},
{
"epoch": 2.8,
"learning_rate": 2.016660630206447e-05,
"loss": 0.6975,
"step": 12980
},
{
"epoch": 2.8,
"learning_rate": 1.9731981166244112e-05,
"loss": 0.7065,
"step": 13000
},
{
"epoch": 2.8,
"eval_loss": 0.7130131721496582,
"eval_runtime": 25.4905,
"eval_samples_per_second": 78.461,
"eval_steps_per_second": 1.255,
"step": 13000
},
{
"epoch": 2.81,
"learning_rate": 1.9297356030423755e-05,
"loss": 0.7,
"step": 13020
},
{
"epoch": 2.81,
"learning_rate": 1.8862730894603402e-05,
"loss": 0.7144,
"step": 13040
},
{
"epoch": 2.82,
"learning_rate": 1.842810575878305e-05,
"loss": 0.6964,
"step": 13060
},
{
"epoch": 2.82,
"learning_rate": 1.7993480622962692e-05,
"loss": 0.6981,
"step": 13080
},
{
"epoch": 2.83,
"learning_rate": 1.755885548714234e-05,
"loss": 0.7102,
"step": 13100
},
{
"epoch": 2.83,
"learning_rate": 1.7124230351321985e-05,
"loss": 0.6975,
"step": 13120
},
{
"epoch": 2.83,
"learning_rate": 1.668960521550163e-05,
"loss": 0.7062,
"step": 13140
},
{
"epoch": 2.84,
"learning_rate": 1.625498007968127e-05,
"loss": 0.6956,
"step": 13160
},
{
"epoch": 2.84,
"learning_rate": 1.5820354943860918e-05,
"loss": 0.71,
"step": 13180
},
{
"epoch": 2.85,
"learning_rate": 1.5385729808040565e-05,
"loss": 0.7081,
"step": 13200
},
{
"epoch": 2.85,
"eval_loss": 0.7126001119613647,
"eval_runtime": 25.5102,
"eval_samples_per_second": 78.4,
"eval_steps_per_second": 1.254,
"step": 13200
},
{
"epoch": 2.85,
"learning_rate": 1.495110467222021e-05,
"loss": 0.6977,
"step": 13220
},
{
"epoch": 2.86,
"learning_rate": 1.4516479536399855e-05,
"loss": 0.705,
"step": 13240
},
{
"epoch": 2.86,
"learning_rate": 1.4081854400579498e-05,
"loss": 0.7016,
"step": 13260
},
{
"epoch": 2.87,
"learning_rate": 1.3647229264759143e-05,
"loss": 0.6922,
"step": 13280
},
{
"epoch": 2.87,
"learning_rate": 1.321260412893879e-05,
"loss": 0.6987,
"step": 13300
},
{
"epoch": 2.87,
"learning_rate": 1.2777978993118434e-05,
"loss": 0.7041,
"step": 13320
},
{
"epoch": 2.88,
"learning_rate": 1.234335385729808e-05,
"loss": 0.7101,
"step": 13340
},
{
"epoch": 2.88,
"learning_rate": 1.1908728721477723e-05,
"loss": 0.6976,
"step": 13360
},
{
"epoch": 2.89,
"learning_rate": 1.147410358565737e-05,
"loss": 0.7011,
"step": 13380
},
{
"epoch": 2.89,
"learning_rate": 1.1039478449837014e-05,
"loss": 0.6973,
"step": 13400
},
{
"epoch": 2.89,
"eval_loss": 0.7123447060585022,
"eval_runtime": 25.5029,
"eval_samples_per_second": 78.422,
"eval_steps_per_second": 1.255,
"step": 13400
}
],
"max_steps": 13905,
"num_train_epochs": 3,
"total_flos": 6.775116663531084e+19,
"trial_name": null,
"trial_params": null
}