Mel-Iza0's picture
Training in progress, epoch 0
d21e52a
raw
history blame
42.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.24560541735377706,
"global_step": 3500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.1428571428571429e-05,
"loss": 1.3156,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 2.2857142857142858e-05,
"loss": 1.2717,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 3.428571428571429e-05,
"loss": 1.1819,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 4.5714285714285716e-05,
"loss": 1.038,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 5.714285714285714e-05,
"loss": 0.9192,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 6.857142857142858e-05,
"loss": 0.8204,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 8e-05,
"loss": 0.7139,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 9.142857142857143e-05,
"loss": 0.6191,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 0.00010285714285714286,
"loss": 0.559,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 0.00011428571428571428,
"loss": 0.5244,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 0.00012571428571428572,
"loss": 0.5149,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 0.00013714285714285716,
"loss": 0.4988,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 0.00014857142857142857,
"loss": 0.475,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 0.00016,
"loss": 0.4527,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 0.00017142857142857143,
"loss": 0.4563,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 0.00018285714285714286,
"loss": 0.4443,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 0.0001942857142857143,
"loss": 0.4247,
"step": 170
},
{
"epoch": 0.01,
"learning_rate": 0.00020571428571428572,
"loss": 0.4369,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 0.00021714285714285713,
"loss": 0.4371,
"step": 190
},
{
"epoch": 0.01,
"learning_rate": 0.00022857142857142857,
"loss": 0.42,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 0.00024,
"loss": 0.4221,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 0.00025142857142857145,
"loss": 0.4162,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 0.00026285714285714286,
"loss": 0.4065,
"step": 230
},
{
"epoch": 0.02,
"learning_rate": 0.0002742857142857143,
"loss": 0.4094,
"step": 240
},
{
"epoch": 0.02,
"learning_rate": 0.00028571428571428574,
"loss": 0.4017,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 0.00029714285714285715,
"loss": 0.4291,
"step": 260
},
{
"epoch": 0.02,
"learning_rate": 0.0003085714285714286,
"loss": 0.4198,
"step": 270
},
{
"epoch": 0.02,
"learning_rate": 0.00032,
"loss": 0.4069,
"step": 280
},
{
"epoch": 0.02,
"learning_rate": 0.0003314285714285715,
"loss": 0.4118,
"step": 290
},
{
"epoch": 0.02,
"learning_rate": 0.00034285714285714285,
"loss": 0.3921,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 0.00035428571428571426,
"loss": 0.3911,
"step": 310
},
{
"epoch": 0.02,
"learning_rate": 0.00036571428571428573,
"loss": 0.4013,
"step": 320
},
{
"epoch": 0.02,
"learning_rate": 0.00037714285714285714,
"loss": 0.4114,
"step": 330
},
{
"epoch": 0.02,
"learning_rate": 0.0003885714285714286,
"loss": 0.4098,
"step": 340
},
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 0.4114,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 0.00039999005339118863,
"loss": 0.3901,
"step": 360
},
{
"epoch": 0.03,
"learning_rate": 0.0003999602145541047,
"loss": 0.405,
"step": 370
},
{
"epoch": 0.03,
"learning_rate": 0.00039991048645670067,
"loss": 0.3946,
"step": 380
},
{
"epoch": 0.03,
"learning_rate": 0.00039984087404523587,
"loss": 0.398,
"step": 390
},
{
"epoch": 0.03,
"learning_rate": 0.00039975138424378447,
"loss": 0.3979,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 0.000399642025953547,
"loss": 0.3996,
"step": 410
},
{
"epoch": 0.03,
"learning_rate": 0.0003995128100519649,
"loss": 0.3935,
"step": 420
},
{
"epoch": 0.03,
"learning_rate": 0.0003993637493916382,
"loss": 0.4008,
"step": 430
},
{
"epoch": 0.03,
"learning_rate": 0.00039919485879904784,
"loss": 0.3875,
"step": 440
},
{
"epoch": 0.03,
"learning_rate": 0.00039900615507308033,
"loss": 0.3918,
"step": 450
},
{
"epoch": 0.03,
"learning_rate": 0.00039879765698335704,
"loss": 0.3907,
"step": 460
},
{
"epoch": 0.03,
"learning_rate": 0.0003985693852683675,
"loss": 0.3899,
"step": 470
},
{
"epoch": 0.03,
"learning_rate": 0.00039832136263340603,
"loss": 0.3916,
"step": 480
},
{
"epoch": 0.03,
"learning_rate": 0.0003980536137483141,
"loss": 0.3971,
"step": 490
},
{
"epoch": 0.04,
"learning_rate": 0.0003977661652450257,
"loss": 0.3915,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 0.00039745904571491917,
"loss": 0.3838,
"step": 510
},
{
"epoch": 0.04,
"learning_rate": 0.0003971322857059726,
"loss": 0.378,
"step": 520
},
{
"epoch": 0.04,
"learning_rate": 0.0003967859177197259,
"loss": 0.3795,
"step": 530
},
{
"epoch": 0.04,
"learning_rate": 0.0003964199762080478,
"loss": 0.3697,
"step": 540
},
{
"epoch": 0.04,
"learning_rate": 0.00039603449756970877,
"loss": 0.3785,
"step": 550
},
{
"epoch": 0.04,
"learning_rate": 0.00039562952014676116,
"loss": 0.3868,
"step": 560
},
{
"epoch": 0.04,
"learning_rate": 0.0003952050842207249,
"loss": 0.3938,
"step": 570
},
{
"epoch": 0.04,
"learning_rate": 0.0003947612320085813,
"loss": 0.385,
"step": 580
},
{
"epoch": 0.04,
"learning_rate": 0.0003942980076585735,
"loss": 0.3884,
"step": 590
},
{
"epoch": 0.04,
"learning_rate": 0.0003938154572458156,
"loss": 0.3835,
"step": 600
},
{
"epoch": 0.04,
"learning_rate": 0.00039331362876770945,
"loss": 0.3884,
"step": 610
},
{
"epoch": 0.04,
"learning_rate": 0.00039279257213917066,
"loss": 0.3873,
"step": 620
},
{
"epoch": 0.04,
"learning_rate": 0.0003922523391876638,
"loss": 0.3824,
"step": 630
},
{
"epoch": 0.04,
"learning_rate": 0.00039169298364804715,
"loss": 0.3853,
"step": 640
},
{
"epoch": 0.05,
"learning_rate": 0.0003911145611572282,
"loss": 0.3783,
"step": 650
},
{
"epoch": 0.05,
"learning_rate": 0.0003905171292486292,
"loss": 0.3806,
"step": 660
},
{
"epoch": 0.05,
"learning_rate": 0.00038990074734646525,
"loss": 0.3794,
"step": 670
},
{
"epoch": 0.05,
"learning_rate": 0.00038926547675983286,
"loss": 0.3883,
"step": 680
},
{
"epoch": 0.05,
"learning_rate": 0.0003886113806766121,
"loss": 0.3764,
"step": 690
},
{
"epoch": 0.05,
"learning_rate": 0.0003879385241571817,
"loss": 0.3872,
"step": 700
},
{
"epoch": 0.05,
"learning_rate": 0.00038724697412794747,
"loss": 0.381,
"step": 710
},
{
"epoch": 0.05,
"learning_rate": 0.00038653679937468557,
"loss": 0.3777,
"step": 720
},
{
"epoch": 0.05,
"learning_rate": 0.0003858080705357005,
"loss": 0.378,
"step": 730
},
{
"epoch": 0.05,
"learning_rate": 0.00038506086009479937,
"loss": 0.3838,
"step": 740
},
{
"epoch": 0.05,
"learning_rate": 0.0003842952423740815,
"loss": 0.3748,
"step": 750
},
{
"epoch": 0.05,
"learning_rate": 0.00038351129352654677,
"loss": 0.3732,
"step": 760
},
{
"epoch": 0.05,
"learning_rate": 0.0003827090915285202,
"loss": 0.3677,
"step": 770
},
{
"epoch": 0.05,
"learning_rate": 0.00038188871617189646,
"loss": 0.3635,
"step": 780
},
{
"epoch": 0.06,
"learning_rate": 0.000381050249056203,
"loss": 0.369,
"step": 790
},
{
"epoch": 0.06,
"learning_rate": 0.0003801937735804838,
"loss": 0.3823,
"step": 800
},
{
"epoch": 0.06,
"learning_rate": 0.0003793193749350042,
"loss": 0.385,
"step": 810
},
{
"epoch": 0.06,
"learning_rate": 0.0003784271400927767,
"loss": 0.3785,
"step": 820
},
{
"epoch": 0.06,
"learning_rate": 0.00037751715780091086,
"loss": 0.3779,
"step": 830
},
{
"epoch": 0.06,
"learning_rate": 0.00037658951857178543,
"loss": 0.3604,
"step": 840
},
{
"epoch": 0.06,
"learning_rate": 0.00037564431467404573,
"loss": 0.3742,
"step": 850
},
{
"epoch": 0.06,
"learning_rate": 0.00037468164012342597,
"loss": 0.365,
"step": 860
},
{
"epoch": 0.06,
"learning_rate": 0.0003737015906733978,
"loss": 0.382,
"step": 870
},
{
"epoch": 0.06,
"learning_rate": 0.0003727042638056462,
"loss": 0.3804,
"step": 880
},
{
"epoch": 0.06,
"learning_rate": 0.00037168975872037323,
"loss": 0.3794,
"step": 890
},
{
"epoch": 0.06,
"learning_rate": 0.00037065817632643116,
"loss": 0.3794,
"step": 900
},
{
"epoch": 0.06,
"learning_rate": 0.0003696096192312852,
"loss": 0.3725,
"step": 910
},
{
"epoch": 0.06,
"learning_rate": 0.0003685441917308078,
"loss": 0.3698,
"step": 920
},
{
"epoch": 0.07,
"learning_rate": 0.0003674619997989047,
"loss": 0.3679,
"step": 930
},
{
"epoch": 0.07,
"learning_rate": 0.0003663631510769738,
"loss": 0.3671,
"step": 940
},
{
"epoch": 0.07,
"learning_rate": 0.000365247754863199,
"loss": 0.3719,
"step": 950
},
{
"epoch": 0.07,
"learning_rate": 0.0003641159221016783,
"loss": 0.3685,
"step": 960
},
{
"epoch": 0.07,
"learning_rate": 0.00036296776537138905,
"loss": 0.3703,
"step": 970
},
{
"epoch": 0.07,
"learning_rate": 0.0003618033988749895,
"loss": 0.3708,
"step": 980
},
{
"epoch": 0.07,
"learning_rate": 0.00036062293842746036,
"loss": 0.3642,
"step": 990
},
{
"epoch": 0.07,
"learning_rate": 0.0003594265014445845,
"loss": 0.3692,
"step": 1000
},
{
"epoch": 0.07,
"learning_rate": 0.0003582142069312683,
"loss": 0.3753,
"step": 1010
},
{
"epoch": 0.07,
"learning_rate": 0.0003569861754697045,
"loss": 0.3673,
"step": 1020
},
{
"epoch": 0.07,
"learning_rate": 0.0003557425292073788,
"loss": 0.3728,
"step": 1030
},
{
"epoch": 0.07,
"learning_rate": 0.0003544833918449199,
"loss": 0.3711,
"step": 1040
},
{
"epoch": 0.07,
"learning_rate": 0.0003532088886237956,
"loss": 0.3657,
"step": 1050
},
{
"epoch": 0.07,
"learning_rate": 0.00035191914631385563,
"loss": 0.3714,
"step": 1060
},
{
"epoch": 0.08,
"learning_rate": 0.00035061429320072223,
"loss": 0.3705,
"step": 1070
},
{
"epoch": 0.08,
"learning_rate": 0.00034929445907303007,
"loss": 0.3672,
"step": 1080
},
{
"epoch": 0.08,
"learning_rate": 0.0003479597752095168,
"loss": 0.3628,
"step": 1090
},
{
"epoch": 0.08,
"learning_rate": 0.0003466103743659653,
"loss": 0.3647,
"step": 1100
},
{
"epoch": 0.08,
"learning_rate": 0.0003452463907619986,
"loss": 0.3699,
"step": 1110
},
{
"epoch": 0.08,
"learning_rate": 0.0003438679600677302,
"loss": 0.3699,
"step": 1120
},
{
"epoch": 0.08,
"learning_rate": 0.000342475219390269,
"loss": 0.3659,
"step": 1130
},
{
"epoch": 0.08,
"learning_rate": 0.0003410683072600818,
"loss": 0.3708,
"step": 1140
},
{
"epoch": 0.08,
"learning_rate": 0.00033964736361721457,
"loss": 0.3667,
"step": 1150
},
{
"epoch": 0.08,
"learning_rate": 0.00033821252979737297,
"loss": 0.3628,
"step": 1160
},
{
"epoch": 0.08,
"learning_rate": 0.00033676394851786404,
"loss": 0.3618,
"step": 1170
},
{
"epoch": 0.08,
"learning_rate": 0.00033530176386340106,
"loss": 0.372,
"step": 1180
},
{
"epoch": 0.08,
"learning_rate": 0.00033382612127177166,
"loss": 0.3637,
"step": 1190
},
{
"epoch": 0.08,
"learning_rate": 0.0003323371675193719,
"loss": 0.3623,
"step": 1200
},
{
"epoch": 0.08,
"learning_rate": 0.0003308350507066069,
"loss": 0.3684,
"step": 1210
},
{
"epoch": 0.09,
"learning_rate": 0.0003293199202431599,
"loss": 0.3648,
"step": 1220
},
{
"epoch": 0.09,
"learning_rate": 0.0003277919268331312,
"loss": 0.3555,
"step": 1230
},
{
"epoch": 0.09,
"learning_rate": 0.0003262512224600478,
"loss": 0.3592,
"step": 1240
},
{
"epoch": 0.09,
"learning_rate": 0.00032469796037174674,
"loss": 0.3587,
"step": 1250
},
{
"epoch": 0.09,
"learning_rate": 0.0003231322950651316,
"loss": 0.3645,
"step": 1260
},
{
"epoch": 0.09,
"learning_rate": 0.00032155438227080604,
"loss": 0.3647,
"step": 1270
},
{
"epoch": 0.09,
"learning_rate": 0.00031996437893758276,
"loss": 0.3563,
"step": 1280
},
{
"epoch": 0.09,
"learning_rate": 0.00031836244321687356,
"loss": 0.3613,
"step": 1290
},
{
"epoch": 0.09,
"learning_rate": 0.000316748734446958,
"loss": 0.3726,
"step": 1300
},
{
"epoch": 0.09,
"learning_rate": 0.00031512341313713474,
"loss": 0.377,
"step": 1310
},
{
"epoch": 0.09,
"learning_rate": 0.0003134866409517564,
"loss": 0.3629,
"step": 1320
},
{
"epoch": 0.09,
"learning_rate": 0.0003118385806941494,
"loss": 0.363,
"step": 1330
},
{
"epoch": 0.09,
"learning_rate": 0.0003101793962904205,
"loss": 0.353,
"step": 1340
},
{
"epoch": 0.09,
"learning_rate": 0.0003085092527731519,
"loss": 0.3537,
"step": 1350
},
{
"epoch": 0.1,
"learning_rate": 0.00030682831626498575,
"loss": 0.3606,
"step": 1360
},
{
"epoch": 0.1,
"learning_rate": 0.00030513675396210094,
"loss": 0.357,
"step": 1370
},
{
"epoch": 0.1,
"learning_rate": 0.00030343473411758253,
"loss": 0.3603,
"step": 1380
},
{
"epoch": 0.1,
"learning_rate": 0.0003017224260246863,
"loss": 0.3618,
"step": 1390
},
{
"epoch": 0.1,
"learning_rate": 0.00030000000000000003,
"loss": 0.3674,
"step": 1400
},
{
"epoch": 0.1,
"learning_rate": 0.0002982676273665023,
"loss": 0.3711,
"step": 1410
},
{
"epoch": 0.1,
"learning_rate": 0.0002965254804365222,
"loss": 0.366,
"step": 1420
},
{
"epoch": 0.1,
"learning_rate": 0.0002947737324945997,
"loss": 0.3727,
"step": 1430
},
{
"epoch": 0.1,
"learning_rate": 0.00029318909447116496,
"loss": 0.372,
"step": 1440
},
{
"epoch": 0.1,
"learning_rate": 0.0002914195854117389,
"loss": 0.3683,
"step": 1450
},
{
"epoch": 0.1,
"learning_rate": 0.0002896409832037748,
"loss": 0.356,
"step": 1460
},
{
"epoch": 0.1,
"learning_rate": 0.0002878534647578768,
"loss": 0.3599,
"step": 1470
},
{
"epoch": 0.1,
"learning_rate": 0.0002860572078715121,
"loss": 0.3605,
"step": 1480
},
{
"epoch": 0.1,
"learning_rate": 0.0002842523912113264,
"loss": 0.3595,
"step": 1490
},
{
"epoch": 0.11,
"learning_rate": 0.00028243919429537265,
"loss": 0.3592,
"step": 1500
},
{
"epoch": 0.11,
"learning_rate": 0.00028061779747525504,
"loss": 0.3582,
"step": 1510
},
{
"epoch": 0.11,
"learning_rate": 0.00027878838191819024,
"loss": 0.3473,
"step": 1520
},
{
"epoch": 0.11,
"learning_rate": 0.0002769511295889872,
"loss": 0.3577,
"step": 1530
},
{
"epoch": 0.11,
"learning_rate": 0.000275106223231948,
"loss": 0.3465,
"step": 1540
},
{
"epoch": 0.11,
"learning_rate": 0.00027325384635269094,
"loss": 0.3547,
"step": 1550
},
{
"epoch": 0.11,
"learning_rate": 0.00027139418319989785,
"loss": 0.3583,
"step": 1560
},
{
"epoch": 0.11,
"learning_rate": 0.0002695274187469878,
"loss": 0.3558,
"step": 1570
},
{
"epoch": 0.11,
"learning_rate": 0.0002676537386737183,
"loss": 0.3547,
"step": 1580
},
{
"epoch": 0.11,
"learning_rate": 0.00026577332934771665,
"loss": 0.3591,
"step": 1590
},
{
"epoch": 0.11,
"learning_rate": 0.0002638863778059426,
"loss": 0.3443,
"step": 1600
},
{
"epoch": 0.11,
"learning_rate": 0.00026199307173608433,
"loss": 0.3481,
"step": 1610
},
{
"epoch": 0.11,
"learning_rate": 0.0002600935994578904,
"loss": 0.349,
"step": 1620
},
{
"epoch": 0.11,
"learning_rate": 0.0002581881499044377,
"loss": 0.3565,
"step": 1630
},
{
"epoch": 0.12,
"learning_rate": 0.0002562769126033394,
"loss": 0.3539,
"step": 1640
},
{
"epoch": 0.12,
"learning_rate": 0.00025436007765789327,
"loss": 0.353,
"step": 1650
},
{
"epoch": 0.12,
"learning_rate": 0.00025243783572817297,
"loss": 0.3612,
"step": 1660
},
{
"epoch": 0.12,
"learning_rate": 0.0002505103780120636,
"loss": 0.3573,
"step": 1670
},
{
"epoch": 0.12,
"learning_rate": 0.0002485778962262443,
"loss": 0.3536,
"step": 1680
},
{
"epoch": 0.12,
"learning_rate": 0.00024664058258711853,
"loss": 0.3632,
"step": 1690
},
{
"epoch": 0.12,
"learning_rate": 0.0002446986297916954,
"loss": 0.3522,
"step": 1700
},
{
"epoch": 0.12,
"learning_rate": 0.00024275223099842291,
"loss": 0.3509,
"step": 1710
},
{
"epoch": 0.12,
"learning_rate": 0.00024080157980797484,
"loss": 0.348,
"step": 1720
},
{
"epoch": 0.12,
"learning_rate": 0.0002388468702439944,
"loss": 0.3556,
"step": 1730
},
{
"epoch": 0.12,
"learning_rate": 0.00023688829673379534,
"loss": 0.3561,
"step": 1740
},
{
"epoch": 0.12,
"learning_rate": 0.00023492605408902297,
"loss": 0.3532,
"step": 1750
},
{
"epoch": 0.12,
"learning_rate": 0.00023296033748627712,
"loss": 0.346,
"step": 1760
},
{
"epoch": 0.12,
"learning_rate": 0.0002309913424476986,
"loss": 0.3521,
"step": 1770
},
{
"epoch": 0.12,
"learning_rate": 0.00022901926482152138,
"loss": 0.3505,
"step": 1780
},
{
"epoch": 0.13,
"learning_rate": 0.00022704430076259246,
"loss": 0.3444,
"step": 1790
},
{
"epoch": 0.13,
"learning_rate": 0.00022506664671286087,
"loss": 0.3467,
"step": 1800
},
{
"epoch": 0.13,
"learning_rate": 0.00022308649938183864,
"loss": 0.3561,
"step": 1810
},
{
"epoch": 0.13,
"learning_rate": 0.00022110405572703466,
"loss": 0.3529,
"step": 1820
},
{
"epoch": 0.13,
"learning_rate": 0.00021911951293436416,
"loss": 0.348,
"step": 1830
},
{
"epoch": 0.13,
"learning_rate": 0.00021713306839853545,
"loss": 0.3449,
"step": 1840
},
{
"epoch": 0.13,
"learning_rate": 0.0002151449197034157,
"loss": 0.3525,
"step": 1850
},
{
"epoch": 0.13,
"learning_rate": 0.0002131552646023783,
"loss": 0.3431,
"step": 1860
},
{
"epoch": 0.13,
"learning_rate": 0.00021116430099863277,
"loss": 0.35,
"step": 1870
},
{
"epoch": 0.13,
"learning_rate": 0.0002091722269255404,
"loss": 0.3397,
"step": 1880
},
{
"epoch": 0.13,
"learning_rate": 0.0002071792405269165,
"loss": 0.3532,
"step": 1890
},
{
"epoch": 0.13,
"learning_rate": 0.00020518554003732167,
"loss": 0.3487,
"step": 1900
},
{
"epoch": 0.13,
"learning_rate": 0.00020319132376234462,
"loss": 0.3508,
"step": 1910
},
{
"epoch": 0.13,
"learning_rate": 0.00020119679005887702,
"loss": 0.35,
"step": 1920
},
{
"epoch": 0.14,
"learning_rate": 0.00019920213731538394,
"loss": 0.3479,
"step": 1930
},
{
"epoch": 0.14,
"learning_rate": 0.00019720756393217098,
"loss": 0.3439,
"step": 1940
},
{
"epoch": 0.14,
"learning_rate": 0.00019521326830164998,
"loss": 0.337,
"step": 1950
},
{
"epoch": 0.14,
"learning_rate": 0.00019321944878860587,
"loss": 0.3419,
"step": 1960
},
{
"epoch": 0.14,
"learning_rate": 0.000191226303710466,
"loss": 0.3508,
"step": 1970
},
{
"epoch": 0.14,
"learning_rate": 0.00018923403131757439,
"loss": 0.3474,
"step": 1980
},
{
"epoch": 0.14,
"learning_rate": 0.00018724282977347235,
"loss": 0.3451,
"step": 1990
},
{
"epoch": 0.14,
"learning_rate": 0.00018525289713518817,
"loss": 0.3449,
"step": 2000
},
{
"epoch": 0.14,
"learning_rate": 0.00018326443133353693,
"loss": 0.3437,
"step": 2010
},
{
"epoch": 0.14,
"learning_rate": 0.00018127763015343332,
"loss": 0.3425,
"step": 2020
},
{
"epoch": 0.14,
"learning_rate": 0.00017929269121421857,
"loss": 0.3478,
"step": 2030
},
{
"epoch": 0.14,
"learning_rate": 0.00017730981195000406,
"loss": 0.3408,
"step": 2040
},
{
"epoch": 0.14,
"learning_rate": 0.00017532918959003353,
"loss": 0.3452,
"step": 2050
},
{
"epoch": 0.14,
"learning_rate": 0.00017335102113906505,
"loss": 0.3395,
"step": 2060
},
{
"epoch": 0.15,
"learning_rate": 0.00017137550335777612,
"loss": 0.3462,
"step": 2070
},
{
"epoch": 0.15,
"learning_rate": 0.0001694028327431924,
"loss": 0.3397,
"step": 2080
},
{
"epoch": 0.15,
"learning_rate": 0.0001674332055091431,
"loss": 0.3434,
"step": 2090
},
{
"epoch": 0.15,
"learning_rate": 0.0001654668175667442,
"loss": 0.3431,
"step": 2100
},
{
"epoch": 0.15,
"learning_rate": 0.00016350386450491208,
"loss": 0.3326,
"step": 2110
},
{
"epoch": 0.15,
"learning_rate": 0.00016154454157090884,
"loss": 0.3392,
"step": 2120
},
{
"epoch": 0.15,
"learning_rate": 0.00015958904365092225,
"loss": 0.339,
"step": 2130
},
{
"epoch": 0.15,
"learning_rate": 0.00015763756525068065,
"loss": 0.336,
"step": 2140
},
{
"epoch": 0.15,
"learning_rate": 0.00015569030047610656,
"loss": 0.3363,
"step": 2150
},
{
"epoch": 0.15,
"learning_rate": 0.0001537474430140096,
"loss": 0.3373,
"step": 2160
},
{
"epoch": 0.15,
"learning_rate": 0.0001518091861128213,
"loss": 0.3419,
"step": 2170
},
{
"epoch": 0.15,
"learning_rate": 0.00014987572256337336,
"loss": 0.3493,
"step": 2180
},
{
"epoch": 0.15,
"learning_rate": 0.0001479472446797216,
"loss": 0.3334,
"step": 2190
},
{
"epoch": 0.15,
"learning_rate": 0.00014602394428001712,
"loss": 0.3384,
"step": 2200
},
{
"epoch": 0.16,
"learning_rate": 0.00014410601266742691,
"loss": 0.3435,
"step": 2210
},
{
"epoch": 0.16,
"learning_rate": 0.00014219364061110565,
"loss": 0.341,
"step": 2220
},
{
"epoch": 0.16,
"learning_rate": 0.00014028701832722104,
"loss": 0.3317,
"step": 2230
},
{
"epoch": 0.16,
"learning_rate": 0.00013838633546003302,
"loss": 0.3422,
"step": 2240
},
{
"epoch": 0.16,
"learning_rate": 0.00013649178106303115,
"loss": 0.3457,
"step": 2250
},
{
"epoch": 0.16,
"learning_rate": 0.00013460354358013,
"loss": 0.3356,
"step": 2260
},
{
"epoch": 0.16,
"learning_rate": 0.0001327218108269255,
"loss": 0.3377,
"step": 2270
},
{
"epoch": 0.16,
"learning_rate": 0.00013084676997201342,
"loss": 0.3304,
"step": 2280
},
{
"epoch": 0.16,
"learning_rate": 0.00012897860751837263,
"loss": 0.3413,
"step": 2290
},
{
"epoch": 0.16,
"learning_rate": 0.00012711750928481443,
"loss": 0.3433,
"step": 2300
},
{
"epoch": 0.16,
"learning_rate": 0.00012526366038749956,
"loss": 0.3419,
"step": 2310
},
{
"epoch": 0.16,
"learning_rate": 0.000123417245221526,
"loss": 0.3439,
"step": 2320
},
{
"epoch": 0.16,
"learning_rate": 0.00012157844744258722,
"loss": 0.3439,
"step": 2330
},
{
"epoch": 0.16,
"learning_rate": 0.00011974744994870517,
"loss": 0.3406,
"step": 2340
},
{
"epoch": 0.16,
"learning_rate": 0.00011792443486203788,
"loss": 0.3424,
"step": 2350
},
{
"epoch": 0.17,
"learning_rate": 0.00011610958351076458,
"loss": 0.34,
"step": 2360
},
{
"epoch": 0.17,
"learning_rate": 0.00011430307641104971,
"loss": 0.3345,
"step": 2370
},
{
"epoch": 0.17,
"learning_rate": 0.00011250509324908767,
"loss": 0.3411,
"step": 2380
},
{
"epoch": 0.17,
"learning_rate": 0.00011071581286323,
"loss": 0.3393,
"step": 2390
},
{
"epoch": 0.17,
"learning_rate": 0.00010893541322619732,
"loss": 0.3337,
"step": 2400
},
{
"epoch": 0.17,
"learning_rate": 0.00010716407142737659,
"loss": 0.3428,
"step": 2410
},
{
"epoch": 0.17,
"learning_rate": 0.00010540196365520754,
"loss": 0.3406,
"step": 2420
},
{
"epoch": 0.17,
"learning_rate": 0.00010364926517965692,
"loss": 0.3411,
"step": 2430
},
{
"epoch": 0.17,
"learning_rate": 0.0001019061503347858,
"loss": 0.3356,
"step": 2440
},
{
"epoch": 0.17,
"learning_rate": 0.00010017279250140891,
"loss": 0.3342,
"step": 2450
},
{
"epoch": 0.17,
"learning_rate": 9.844936408984924e-05,
"loss": 0.3381,
"step": 2460
},
{
"epoch": 0.17,
"learning_rate": 9.673603652278904e-05,
"loss": 0.3324,
"step": 2470
},
{
"epoch": 0.17,
"learning_rate": 9.503298021821905e-05,
"loss": 0.3255,
"step": 2480
},
{
"epoch": 0.17,
"learning_rate": 9.334036457248774e-05,
"loss": 0.3333,
"step": 2490
},
{
"epoch": 0.18,
"learning_rate": 9.165835794345205e-05,
"loss": 0.33,
"step": 2500
},
{
"epoch": 0.18,
"learning_rate": 8.99871276337315e-05,
"loss": 0.3396,
"step": 2510
},
{
"epoch": 0.18,
"learning_rate": 8.832683987406746e-05,
"loss": 0.3321,
"step": 2520
},
{
"epoch": 0.18,
"learning_rate": 8.667765980678851e-05,
"loss": 0.3279,
"step": 2530
},
{
"epoch": 0.18,
"learning_rate": 8.503975146938444e-05,
"loss": 0.3337,
"step": 2540
},
{
"epoch": 0.18,
"learning_rate": 8.341327777819035e-05,
"loss": 0.3369,
"step": 2550
},
{
"epoch": 0.18,
"learning_rate": 8.179840051218167e-05,
"loss": 0.3326,
"step": 2560
},
{
"epoch": 0.18,
"learning_rate": 8.019528029688286e-05,
"loss": 0.3292,
"step": 2570
},
{
"epoch": 0.18,
"learning_rate": 7.860407658839049e-05,
"loss": 0.3377,
"step": 2580
},
{
"epoch": 0.18,
"learning_rate": 7.702494765751285e-05,
"loss": 0.333,
"step": 2590
},
{
"epoch": 0.18,
"learning_rate": 7.545805057402733e-05,
"loss": 0.3248,
"step": 2600
},
{
"epoch": 0.18,
"learning_rate": 7.390354119105722e-05,
"loss": 0.3357,
"step": 2610
},
{
"epoch": 0.18,
"learning_rate": 7.236157412956994e-05,
"loss": 0.3326,
"step": 2620
},
{
"epoch": 0.18,
"learning_rate": 7.0832302762997e-05,
"loss": 0.328,
"step": 2630
},
{
"epoch": 0.19,
"learning_rate": 6.93158792019789e-05,
"loss": 0.3235,
"step": 2640
},
{
"epoch": 0.19,
"learning_rate": 6.781245427923522e-05,
"loss": 0.3293,
"step": 2650
},
{
"epoch": 0.19,
"learning_rate": 6.632217753456174e-05,
"loss": 0.3238,
"step": 2660
},
{
"epoch": 0.19,
"learning_rate": 6.484519719995647e-05,
"loss": 0.3301,
"step": 2670
},
{
"epoch": 0.19,
"learning_rate": 6.338166018487555e-05,
"loss": 0.3273,
"step": 2680
},
{
"epoch": 0.19,
"learning_rate": 6.193171206162065e-05,
"loss": 0.3305,
"step": 2690
},
{
"epoch": 0.19,
"learning_rate": 6.0495497050859574e-05,
"loss": 0.3335,
"step": 2700
},
{
"epoch": 0.19,
"learning_rate": 5.907315800728106e-05,
"loss": 0.3396,
"step": 2710
},
{
"epoch": 0.19,
"learning_rate": 5.766483640538587e-05,
"loss": 0.3281,
"step": 2720
},
{
"epoch": 0.19,
"learning_rate": 5.62706723254145e-05,
"loss": 0.3255,
"step": 2730
},
{
"epoch": 0.19,
"learning_rate": 5.489080443941415e-05,
"loss": 0.3336,
"step": 2740
},
{
"epoch": 0.19,
"learning_rate": 5.352536999744557e-05,
"loss": 0.3233,
"step": 2750
},
{
"epoch": 0.19,
"learning_rate": 5.217450481393129e-05,
"loss": 0.3273,
"step": 2760
},
{
"epoch": 0.19,
"learning_rate": 5.083834325414667e-05,
"loss": 0.3329,
"step": 2770
},
{
"epoch": 0.2,
"learning_rate": 4.951701822085515e-05,
"loss": 0.3241,
"step": 2780
},
{
"epoch": 0.2,
"learning_rate": 4.821066114108892e-05,
"loss": 0.3346,
"step": 2790
},
{
"epoch": 0.2,
"learning_rate": 4.69194019530764e-05,
"loss": 0.3358,
"step": 2800
},
{
"epoch": 0.2,
"learning_rate": 4.564336909331768e-05,
"loss": 0.3275,
"step": 2810
},
{
"epoch": 0.2,
"learning_rate": 4.438268948380972e-05,
"loss": 0.3245,
"step": 2820
},
{
"epoch": 0.2,
"learning_rate": 4.3137488519421656e-05,
"loss": 0.33,
"step": 2830
},
{
"epoch": 0.2,
"learning_rate": 4.1907890055422286e-05,
"loss": 0.326,
"step": 2840
},
{
"epoch": 0.2,
"learning_rate": 4.069401639516075e-05,
"loss": 0.3262,
"step": 2850
},
{
"epoch": 0.2,
"learning_rate": 3.949598827790155e-05,
"loss": 0.3266,
"step": 2860
},
{
"epoch": 0.2,
"learning_rate": 3.831392486681495e-05,
"loss": 0.3228,
"step": 2870
},
{
"epoch": 0.2,
"learning_rate": 3.714794373712431e-05,
"loss": 0.3327,
"step": 2880
},
{
"epoch": 0.2,
"learning_rate": 3.5998160864411476e-05,
"loss": 0.3171,
"step": 2890
},
{
"epoch": 0.2,
"learning_rate": 3.486469061308093e-05,
"loss": 0.3197,
"step": 2900
},
{
"epoch": 0.2,
"learning_rate": 3.3747645724984544e-05,
"loss": 0.3268,
"step": 2910
},
{
"epoch": 0.2,
"learning_rate": 3.264713730820768e-05,
"loss": 0.3254,
"step": 2920
},
{
"epoch": 0.21,
"learning_rate": 3.156327482601742e-05,
"loss": 0.3251,
"step": 2930
},
{
"epoch": 0.21,
"learning_rate": 3.0496166085974943e-05,
"loss": 0.3302,
"step": 2940
},
{
"epoch": 0.21,
"learning_rate": 2.9445917229212193e-05,
"loss": 0.3176,
"step": 2950
},
{
"epoch": 0.21,
"learning_rate": 2.8412632719874532e-05,
"loss": 0.3203,
"step": 2960
},
{
"epoch": 0.21,
"learning_rate": 2.7396415334729964e-05,
"loss": 0.3183,
"step": 2970
},
{
"epoch": 0.21,
"learning_rate": 2.6397366152946523e-05,
"loss": 0.3285,
"step": 2980
},
{
"epoch": 0.21,
"learning_rate": 2.5415584546038096e-05,
"loss": 0.3279,
"step": 2990
},
{
"epoch": 0.21,
"learning_rate": 2.4451168167980497e-05,
"loss": 0.3186,
"step": 3000
},
{
"epoch": 0.21,
"learning_rate": 2.350421294549825e-05,
"loss": 0.3222,
"step": 3010
},
{
"epoch": 0.21,
"learning_rate": 2.2574813068522894e-05,
"loss": 0.3271,
"step": 3020
},
{
"epoch": 0.21,
"learning_rate": 2.166306098082451e-05,
"loss": 0.3253,
"step": 3030
},
{
"epoch": 0.21,
"learning_rate": 2.0769047370816553e-05,
"loss": 0.3234,
"step": 3040
},
{
"epoch": 0.21,
"learning_rate": 1.989286116253557e-05,
"loss": 0.3256,
"step": 3050
},
{
"epoch": 0.21,
"learning_rate": 1.903458950679613e-05,
"loss": 0.3234,
"step": 3060
},
{
"epoch": 0.22,
"learning_rate": 1.8194317772522362e-05,
"loss": 0.3228,
"step": 3070
},
{
"epoch": 0.22,
"learning_rate": 1.7372129538256667e-05,
"loss": 0.32,
"step": 3080
},
{
"epoch": 0.22,
"learning_rate": 1.6568106583846378e-05,
"loss": 0.3198,
"step": 3090
},
{
"epoch": 0.22,
"learning_rate": 1.5782328882309484e-05,
"loss": 0.331,
"step": 3100
},
{
"epoch": 0.22,
"learning_rate": 1.5014874591880157e-05,
"loss": 0.3264,
"step": 3110
},
{
"epoch": 0.22,
"learning_rate": 1.4265820048234447e-05,
"loss": 0.3253,
"step": 3120
},
{
"epoch": 0.22,
"learning_rate": 1.3535239756897566e-05,
"loss": 0.3187,
"step": 3130
},
{
"epoch": 0.22,
"learning_rate": 1.2823206385833187e-05,
"loss": 0.3199,
"step": 3140
},
{
"epoch": 0.22,
"learning_rate": 1.212979075821532e-05,
"loss": 0.3225,
"step": 3150
},
{
"epoch": 0.22,
"learning_rate": 1.1455061845383852e-05,
"loss": 0.3195,
"step": 3160
},
{
"epoch": 0.22,
"learning_rate": 1.0799086759984333e-05,
"loss": 0.3163,
"step": 3170
},
{
"epoch": 0.22,
"learning_rate": 1.016193074929237e-05,
"loss": 0.3217,
"step": 3180
},
{
"epoch": 0.22,
"learning_rate": 9.543657188723876e-06,
"loss": 0.3204,
"step": 3190
},
{
"epoch": 0.22,
"learning_rate": 8.944327575531275e-06,
"loss": 0.3196,
"step": 3200
},
{
"epoch": 0.23,
"learning_rate": 8.364001522686726e-06,
"loss": 0.3326,
"step": 3210
},
{
"epoch": 0.23,
"learning_rate": 7.802736752952533e-06,
"loss": 0.3153,
"step": 3220
},
{
"epoch": 0.23,
"learning_rate": 7.260589093139736e-06,
"loss": 0.323,
"step": 3230
},
{
"epoch": 0.23,
"learning_rate": 6.737612468555221e-06,
"loss": 0.3192,
"step": 3240
},
{
"epoch": 0.23,
"learning_rate": 6.2338588976380115e-06,
"loss": 0.3166,
"step": 3250
},
{
"epoch": 0.23,
"learning_rate": 5.749378486785162e-06,
"loss": 0.3134,
"step": 3260
},
{
"epoch": 0.23,
"learning_rate": 5.2842194253679424e-06,
"loss": 0.3225,
"step": 3270
},
{
"epoch": 0.23,
"learning_rate": 4.8384279809385426e-06,
"loss": 0.3251,
"step": 3280
},
{
"epoch": 0.23,
"learning_rate": 4.41204849462804e-06,
"loss": 0.3228,
"step": 3290
},
{
"epoch": 0.23,
"learning_rate": 4.005123376735997e-06,
"loss": 0.3181,
"step": 3300
},
{
"epoch": 0.23,
"learning_rate": 3.617693102512032e-06,
"loss": 0.3135,
"step": 3310
},
{
"epoch": 0.23,
"learning_rate": 3.2497962081299514e-06,
"loss": 0.3196,
"step": 3320
},
{
"epoch": 0.23,
"learning_rate": 2.9014692868546633e-06,
"loss": 0.3143,
"step": 3330
},
{
"epoch": 0.23,
"learning_rate": 2.572746985402419e-06,
"loss": 0.3124,
"step": 3340
},
{
"epoch": 0.24,
"learning_rate": 2.2636620004946154e-06,
"loss": 0.3157,
"step": 3350
},
{
"epoch": 0.24,
"learning_rate": 1.974245075605574e-06,
"loss": 0.3225,
"step": 3360
},
{
"epoch": 0.24,
"learning_rate": 1.7045249979046995e-06,
"loss": 0.3165,
"step": 3370
},
{
"epoch": 0.24,
"learning_rate": 1.4545285953929677e-06,
"loss": 0.3178,
"step": 3380
},
{
"epoch": 0.24,
"learning_rate": 1.224280734234573e-06,
"loss": 0.3129,
"step": 3390
},
{
"epoch": 0.24,
"learning_rate": 1.013804316283573e-06,
"loss": 0.3187,
"step": 3400
},
{
"epoch": 0.24,
"learning_rate": 8.231202768059332e-07,
"loss": 0.3178,
"step": 3410
},
{
"epoch": 0.24,
"learning_rate": 6.522475823970808e-07,
"loss": 0.3252,
"step": 3420
},
{
"epoch": 0.24,
"learning_rate": 5.012032290955037e-07,
"loss": 0.311,
"step": 3430
},
{
"epoch": 0.24,
"learning_rate": 3.7000224069216883e-07,
"loss": 0.3253,
"step": 3440
},
{
"epoch": 0.24,
"learning_rate": 2.586576672361396e-07,
"loss": 0.3205,
"step": 3450
},
{
"epoch": 0.24,
"learning_rate": 1.671805837365703e-07,
"loss": 0.3196,
"step": 3460
},
{
"epoch": 0.24,
"learning_rate": 9.558008906112026e-08,
"loss": 0.3166,
"step": 3470
},
{
"epoch": 0.24,
"learning_rate": 4.386330503090008e-08,
"loss": 0.3255,
"step": 3480
},
{
"epoch": 0.24,
"learning_rate": 1.2035375712105001e-08,
"loss": 0.3181,
"step": 3490
},
{
"epoch": 0.25,
"learning_rate": 9.946690433526584e-11,
"loss": 0.3212,
"step": 3500
}
],
"max_steps": 3500,
"num_train_epochs": 1,
"total_flos": 2.21324249137152e+18,
"trial_name": null,
"trial_params": null
}