PoetryGPT / checkpoint-2000 /trainer_state.json
lichi.jesse
update model
173fe9d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5242463958060288,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.999e-06,
"loss": 6.1641,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 9.994000000000001e-06,
"loss": 5.275,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 9.989e-06,
"loss": 4.8629,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 9.984e-06,
"loss": 4.8023,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 9.979e-06,
"loss": 4.7687,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 9.974e-06,
"loss": 4.7188,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 9.969e-06,
"loss": 4.6258,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 9.964e-06,
"loss": 4.6254,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 9.959e-06,
"loss": 4.5867,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 9.954e-06,
"loss": 4.6207,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 9.949e-06,
"loss": 4.6086,
"step": 55
},
{
"epoch": 0.02,
"learning_rate": 9.944e-06,
"loss": 4.5559,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 9.939000000000001e-06,
"loss": 4.5836,
"step": 65
},
{
"epoch": 0.02,
"learning_rate": 9.934e-06,
"loss": 4.5121,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 9.929000000000001e-06,
"loss": 4.5234,
"step": 75
},
{
"epoch": 0.02,
"learning_rate": 9.924e-06,
"loss": 4.4992,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 9.919000000000001e-06,
"loss": 4.4891,
"step": 85
},
{
"epoch": 0.02,
"learning_rate": 9.914e-06,
"loss": 4.4688,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 9.909000000000001e-06,
"loss": 4.4836,
"step": 95
},
{
"epoch": 0.03,
"learning_rate": 9.904e-06,
"loss": 4.4363,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 9.899000000000001e-06,
"loss": 4.4215,
"step": 105
},
{
"epoch": 0.03,
"learning_rate": 9.894e-06,
"loss": 4.4469,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 9.889000000000001e-06,
"loss": 4.3793,
"step": 115
},
{
"epoch": 0.03,
"learning_rate": 9.884e-06,
"loss": 4.3934,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 9.879000000000001e-06,
"loss": 4.3309,
"step": 125
},
{
"epoch": 0.03,
"learning_rate": 9.874e-06,
"loss": 4.3875,
"step": 130
},
{
"epoch": 0.04,
"learning_rate": 9.869000000000002e-06,
"loss": 4.4262,
"step": 135
},
{
"epoch": 0.04,
"learning_rate": 9.864e-06,
"loss": 4.4285,
"step": 140
},
{
"epoch": 0.04,
"learning_rate": 9.859e-06,
"loss": 4.3965,
"step": 145
},
{
"epoch": 0.04,
"learning_rate": 9.854000000000001e-06,
"loss": 4.3359,
"step": 150
},
{
"epoch": 0.04,
"learning_rate": 9.849e-06,
"loss": 4.4348,
"step": 155
},
{
"epoch": 0.04,
"learning_rate": 9.844000000000001e-06,
"loss": 4.3152,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 9.839e-06,
"loss": 4.3402,
"step": 165
},
{
"epoch": 0.04,
"learning_rate": 9.834000000000001e-06,
"loss": 4.316,
"step": 170
},
{
"epoch": 0.05,
"learning_rate": 9.829e-06,
"loss": 4.2969,
"step": 175
},
{
"epoch": 0.05,
"learning_rate": 9.824000000000001e-06,
"loss": 4.2867,
"step": 180
},
{
"epoch": 0.05,
"learning_rate": 9.819e-06,
"loss": 4.3902,
"step": 185
},
{
"epoch": 0.05,
"learning_rate": 9.814000000000001e-06,
"loss": 4.2656,
"step": 190
},
{
"epoch": 0.05,
"learning_rate": 9.809e-06,
"loss": 4.3797,
"step": 195
},
{
"epoch": 0.05,
"learning_rate": 9.804000000000001e-06,
"loss": 4.2863,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 9.799e-06,
"loss": 4.275,
"step": 205
},
{
"epoch": 0.06,
"learning_rate": 9.794000000000001e-06,
"loss": 4.2891,
"step": 210
},
{
"epoch": 0.06,
"learning_rate": 9.789e-06,
"loss": 4.3059,
"step": 215
},
{
"epoch": 0.06,
"learning_rate": 9.784000000000002e-06,
"loss": 4.3832,
"step": 220
},
{
"epoch": 0.06,
"learning_rate": 9.779e-06,
"loss": 4.3055,
"step": 225
},
{
"epoch": 0.06,
"learning_rate": 9.774000000000002e-06,
"loss": 4.3008,
"step": 230
},
{
"epoch": 0.06,
"learning_rate": 9.769e-06,
"loss": 4.25,
"step": 235
},
{
"epoch": 0.06,
"learning_rate": 9.764000000000002e-06,
"loss": 4.2676,
"step": 240
},
{
"epoch": 0.06,
"learning_rate": 9.759000000000001e-06,
"loss": 4.2422,
"step": 245
},
{
"epoch": 0.07,
"learning_rate": 9.754000000000002e-06,
"loss": 4.3137,
"step": 250
},
{
"epoch": 0.07,
"learning_rate": 9.749000000000001e-06,
"loss": 4.2555,
"step": 255
},
{
"epoch": 0.07,
"learning_rate": 9.744000000000002e-06,
"loss": 4.2637,
"step": 260
},
{
"epoch": 0.07,
"learning_rate": 9.739000000000001e-06,
"loss": 4.2996,
"step": 265
},
{
"epoch": 0.07,
"learning_rate": 9.734000000000002e-06,
"loss": 4.3176,
"step": 270
},
{
"epoch": 0.07,
"learning_rate": 9.729000000000001e-06,
"loss": 4.252,
"step": 275
},
{
"epoch": 0.07,
"learning_rate": 9.724e-06,
"loss": 4.266,
"step": 280
},
{
"epoch": 0.07,
"learning_rate": 9.719000000000001e-06,
"loss": 4.1748,
"step": 285
},
{
"epoch": 0.08,
"learning_rate": 9.714e-06,
"loss": 4.3008,
"step": 290
},
{
"epoch": 0.08,
"learning_rate": 9.709000000000001e-06,
"loss": 4.268,
"step": 295
},
{
"epoch": 0.08,
"learning_rate": 9.704e-06,
"loss": 4.2523,
"step": 300
},
{
"epoch": 0.08,
"learning_rate": 9.699e-06,
"loss": 4.3445,
"step": 305
},
{
"epoch": 0.08,
"learning_rate": 9.694e-06,
"loss": 4.298,
"step": 310
},
{
"epoch": 0.08,
"learning_rate": 9.689e-06,
"loss": 4.2809,
"step": 315
},
{
"epoch": 0.08,
"learning_rate": 9.684e-06,
"loss": 4.2234,
"step": 320
},
{
"epoch": 0.09,
"learning_rate": 9.679e-06,
"loss": 4.227,
"step": 325
},
{
"epoch": 0.09,
"learning_rate": 9.674000000000001e-06,
"loss": 4.2605,
"step": 330
},
{
"epoch": 0.09,
"learning_rate": 9.669e-06,
"loss": 4.2268,
"step": 335
},
{
"epoch": 0.09,
"learning_rate": 9.664000000000001e-06,
"loss": 4.1734,
"step": 340
},
{
"epoch": 0.09,
"learning_rate": 9.659e-06,
"loss": 4.2355,
"step": 345
},
{
"epoch": 0.09,
"learning_rate": 9.654000000000001e-06,
"loss": 4.2123,
"step": 350
},
{
"epoch": 0.09,
"learning_rate": 9.649e-06,
"loss": 4.1396,
"step": 355
},
{
"epoch": 0.09,
"learning_rate": 9.644000000000001e-06,
"loss": 4.1869,
"step": 360
},
{
"epoch": 0.1,
"learning_rate": 9.639e-06,
"loss": 4.2148,
"step": 365
},
{
"epoch": 0.1,
"learning_rate": 9.634000000000001e-06,
"loss": 4.1201,
"step": 370
},
{
"epoch": 0.1,
"learning_rate": 9.629e-06,
"loss": 4.1891,
"step": 375
},
{
"epoch": 0.1,
"learning_rate": 9.624000000000001e-06,
"loss": 4.118,
"step": 380
},
{
"epoch": 0.1,
"learning_rate": 9.619e-06,
"loss": 4.1359,
"step": 385
},
{
"epoch": 0.1,
"learning_rate": 9.614000000000001e-06,
"loss": 4.1469,
"step": 390
},
{
"epoch": 0.1,
"learning_rate": 9.609e-06,
"loss": 4.1941,
"step": 395
},
{
"epoch": 0.1,
"learning_rate": 9.604000000000002e-06,
"loss": 4.1219,
"step": 400
},
{
"epoch": 0.11,
"learning_rate": 9.599e-06,
"loss": 4.0951,
"step": 405
},
{
"epoch": 0.11,
"learning_rate": 9.594000000000002e-06,
"loss": 4.1387,
"step": 410
},
{
"epoch": 0.11,
"learning_rate": 9.589000000000001e-06,
"loss": 4.0973,
"step": 415
},
{
"epoch": 0.11,
"learning_rate": 9.584000000000002e-06,
"loss": 4.1551,
"step": 420
},
{
"epoch": 0.11,
"learning_rate": 9.579000000000001e-06,
"loss": 4.1883,
"step": 425
},
{
"epoch": 0.11,
"learning_rate": 9.574000000000002e-06,
"loss": 4.2137,
"step": 430
},
{
"epoch": 0.11,
"learning_rate": 9.569000000000001e-06,
"loss": 4.1748,
"step": 435
},
{
"epoch": 0.12,
"learning_rate": 9.564e-06,
"loss": 4.1664,
"step": 440
},
{
"epoch": 0.12,
"learning_rate": 9.559000000000001e-06,
"loss": 4.0812,
"step": 445
},
{
"epoch": 0.12,
"learning_rate": 9.554e-06,
"loss": 4.2215,
"step": 450
},
{
"epoch": 0.12,
"learning_rate": 9.549000000000001e-06,
"loss": 4.175,
"step": 455
},
{
"epoch": 0.12,
"learning_rate": 9.544e-06,
"loss": 4.0766,
"step": 460
},
{
"epoch": 0.12,
"learning_rate": 9.539e-06,
"loss": 4.0873,
"step": 465
},
{
"epoch": 0.12,
"learning_rate": 9.534e-06,
"loss": 4.1316,
"step": 470
},
{
"epoch": 0.12,
"learning_rate": 9.529e-06,
"loss": 4.108,
"step": 475
},
{
"epoch": 0.13,
"learning_rate": 9.524e-06,
"loss": 4.1691,
"step": 480
},
{
"epoch": 0.13,
"learning_rate": 9.519e-06,
"loss": 4.1154,
"step": 485
},
{
"epoch": 0.13,
"learning_rate": 9.514e-06,
"loss": 4.1035,
"step": 490
},
{
"epoch": 0.13,
"learning_rate": 9.509e-06,
"loss": 4.1293,
"step": 495
},
{
"epoch": 0.13,
"learning_rate": 9.504e-06,
"loss": 4.1734,
"step": 500
},
{
"epoch": 0.13,
"learning_rate": 9.499e-06,
"loss": 4.0504,
"step": 505
},
{
"epoch": 0.13,
"learning_rate": 9.494000000000001e-06,
"loss": 4.048,
"step": 510
},
{
"epoch": 0.13,
"learning_rate": 9.489e-06,
"loss": 4.1066,
"step": 515
},
{
"epoch": 0.14,
"learning_rate": 9.484000000000001e-06,
"loss": 4.1354,
"step": 520
},
{
"epoch": 0.14,
"learning_rate": 9.479e-06,
"loss": 4.1238,
"step": 525
},
{
"epoch": 0.14,
"learning_rate": 9.474000000000001e-06,
"loss": 4.1232,
"step": 530
},
{
"epoch": 0.14,
"learning_rate": 9.469e-06,
"loss": 4.1252,
"step": 535
},
{
"epoch": 0.14,
"learning_rate": 9.464000000000001e-06,
"loss": 4.0975,
"step": 540
},
{
"epoch": 0.14,
"learning_rate": 9.459e-06,
"loss": 4.1111,
"step": 545
},
{
"epoch": 0.14,
"learning_rate": 9.454000000000001e-06,
"loss": 4.0047,
"step": 550
},
{
"epoch": 0.15,
"learning_rate": 9.449e-06,
"loss": 4.0992,
"step": 555
},
{
"epoch": 0.15,
"learning_rate": 9.444000000000001e-06,
"loss": 4.0734,
"step": 560
},
{
"epoch": 0.15,
"learning_rate": 9.439e-06,
"loss": 4.0809,
"step": 565
},
{
"epoch": 0.15,
"learning_rate": 9.434000000000001e-06,
"loss": 4.101,
"step": 570
},
{
"epoch": 0.15,
"learning_rate": 9.429e-06,
"loss": 4.0662,
"step": 575
},
{
"epoch": 0.15,
"learning_rate": 9.424000000000002e-06,
"loss": 4.1041,
"step": 580
},
{
"epoch": 0.15,
"learning_rate": 9.419e-06,
"loss": 4.0564,
"step": 585
},
{
"epoch": 0.15,
"learning_rate": 9.414000000000002e-06,
"loss": 4.0986,
"step": 590
},
{
"epoch": 0.16,
"learning_rate": 9.409000000000001e-06,
"loss": 4.0309,
"step": 595
},
{
"epoch": 0.16,
"learning_rate": 9.404e-06,
"loss": 4.0605,
"step": 600
},
{
"epoch": 0.16,
"learning_rate": 9.399000000000001e-06,
"loss": 4.0857,
"step": 605
},
{
"epoch": 0.16,
"learning_rate": 9.394e-06,
"loss": 4.1307,
"step": 610
},
{
"epoch": 0.16,
"learning_rate": 9.389000000000001e-06,
"loss": 4.06,
"step": 615
},
{
"epoch": 0.16,
"learning_rate": 9.384e-06,
"loss": 4.0039,
"step": 620
},
{
"epoch": 0.16,
"learning_rate": 9.379000000000001e-06,
"loss": 4.0258,
"step": 625
},
{
"epoch": 0.17,
"learning_rate": 9.374e-06,
"loss": 4.0738,
"step": 630
},
{
"epoch": 0.17,
"learning_rate": 9.369e-06,
"loss": 4.0551,
"step": 635
},
{
"epoch": 0.17,
"learning_rate": 9.364e-06,
"loss": 4.0518,
"step": 640
},
{
"epoch": 0.17,
"learning_rate": 9.359e-06,
"loss": 4.0584,
"step": 645
},
{
"epoch": 0.17,
"learning_rate": 9.354e-06,
"loss": 4.1109,
"step": 650
},
{
"epoch": 0.17,
"learning_rate": 9.349e-06,
"loss": 3.9898,
"step": 655
},
{
"epoch": 0.17,
"learning_rate": 9.344e-06,
"loss": 4.1406,
"step": 660
},
{
"epoch": 0.17,
"learning_rate": 9.339e-06,
"loss": 4.0725,
"step": 665
},
{
"epoch": 0.18,
"learning_rate": 9.334e-06,
"loss": 4.0207,
"step": 670
},
{
"epoch": 0.18,
"learning_rate": 9.329e-06,
"loss": 4.0826,
"step": 675
},
{
"epoch": 0.18,
"learning_rate": 9.324000000000001e-06,
"loss": 4.1059,
"step": 680
},
{
"epoch": 0.18,
"learning_rate": 9.319e-06,
"loss": 3.9967,
"step": 685
},
{
"epoch": 0.18,
"learning_rate": 9.314000000000001e-06,
"loss": 4.0328,
"step": 690
},
{
"epoch": 0.18,
"learning_rate": 9.309e-06,
"loss": 3.9918,
"step": 695
},
{
"epoch": 0.18,
"learning_rate": 9.304000000000001e-06,
"loss": 4.0434,
"step": 700
},
{
"epoch": 0.18,
"learning_rate": 9.299e-06,
"loss": 3.9584,
"step": 705
},
{
"epoch": 0.19,
"learning_rate": 9.294000000000001e-06,
"loss": 4.0551,
"step": 710
},
{
"epoch": 0.19,
"learning_rate": 9.289e-06,
"loss": 3.9684,
"step": 715
},
{
"epoch": 0.19,
"learning_rate": 9.284000000000001e-06,
"loss": 4.0221,
"step": 720
},
{
"epoch": 0.19,
"learning_rate": 9.279e-06,
"loss": 3.985,
"step": 725
},
{
"epoch": 0.19,
"learning_rate": 9.274000000000001e-06,
"loss": 4.0648,
"step": 730
},
{
"epoch": 0.19,
"learning_rate": 9.269e-06,
"loss": 4.0109,
"step": 735
},
{
"epoch": 0.19,
"learning_rate": 9.264000000000001e-06,
"loss": 3.9553,
"step": 740
},
{
"epoch": 0.2,
"learning_rate": 9.259e-06,
"loss": 3.9904,
"step": 745
},
{
"epoch": 0.2,
"learning_rate": 9.254000000000002e-06,
"loss": 3.9719,
"step": 750
},
{
"epoch": 0.2,
"learning_rate": 9.249e-06,
"loss": 3.8973,
"step": 755
},
{
"epoch": 0.2,
"learning_rate": 9.244e-06,
"loss": 3.9936,
"step": 760
},
{
"epoch": 0.2,
"learning_rate": 9.239e-06,
"loss": 3.9498,
"step": 765
},
{
"epoch": 0.2,
"learning_rate": 9.234e-06,
"loss": 3.9557,
"step": 770
},
{
"epoch": 0.2,
"learning_rate": 9.229000000000001e-06,
"loss": 3.9266,
"step": 775
},
{
"epoch": 0.2,
"learning_rate": 9.224e-06,
"loss": 3.9543,
"step": 780
},
{
"epoch": 0.21,
"learning_rate": 9.219000000000001e-06,
"loss": 3.9732,
"step": 785
},
{
"epoch": 0.21,
"learning_rate": 9.214e-06,
"loss": 3.9762,
"step": 790
},
{
"epoch": 0.21,
"learning_rate": 9.209000000000001e-06,
"loss": 4.0695,
"step": 795
},
{
"epoch": 0.21,
"learning_rate": 9.204e-06,
"loss": 3.9869,
"step": 800
},
{
"epoch": 0.21,
"learning_rate": 9.199000000000001e-06,
"loss": 4.0061,
"step": 805
},
{
"epoch": 0.21,
"learning_rate": 9.194e-06,
"loss": 4.0121,
"step": 810
},
{
"epoch": 0.21,
"learning_rate": 9.189000000000001e-06,
"loss": 3.9105,
"step": 815
},
{
"epoch": 0.21,
"learning_rate": 9.184e-06,
"loss": 3.8631,
"step": 820
},
{
"epoch": 0.22,
"learning_rate": 9.179000000000001e-06,
"loss": 3.9498,
"step": 825
},
{
"epoch": 0.22,
"learning_rate": 9.174e-06,
"loss": 3.9451,
"step": 830
},
{
"epoch": 0.22,
"learning_rate": 9.169000000000001e-06,
"loss": 3.951,
"step": 835
},
{
"epoch": 0.22,
"learning_rate": 9.164e-06,
"loss": 3.9297,
"step": 840
},
{
"epoch": 0.22,
"learning_rate": 9.159000000000002e-06,
"loss": 3.9771,
"step": 845
},
{
"epoch": 0.22,
"learning_rate": 9.154e-06,
"loss": 4.0842,
"step": 850
},
{
"epoch": 0.22,
"learning_rate": 9.149000000000002e-06,
"loss": 3.8865,
"step": 855
},
{
"epoch": 0.23,
"learning_rate": 9.144000000000001e-06,
"loss": 3.9312,
"step": 860
},
{
"epoch": 0.23,
"learning_rate": 9.139000000000002e-06,
"loss": 3.8875,
"step": 865
},
{
"epoch": 0.23,
"learning_rate": 9.134000000000001e-06,
"loss": 4.0389,
"step": 870
},
{
"epoch": 0.23,
"learning_rate": 9.129000000000002e-06,
"loss": 3.9568,
"step": 875
},
{
"epoch": 0.23,
"learning_rate": 9.124000000000001e-06,
"loss": 3.9541,
"step": 880
},
{
"epoch": 0.23,
"learning_rate": 9.119000000000002e-06,
"loss": 3.9092,
"step": 885
},
{
"epoch": 0.23,
"learning_rate": 9.114000000000001e-06,
"loss": 3.9404,
"step": 890
},
{
"epoch": 0.23,
"learning_rate": 9.109e-06,
"loss": 3.9371,
"step": 895
},
{
"epoch": 0.24,
"learning_rate": 9.104000000000001e-06,
"loss": 3.9477,
"step": 900
},
{
"epoch": 0.24,
"learning_rate": 9.099e-06,
"loss": 3.9469,
"step": 905
},
{
"epoch": 0.24,
"learning_rate": 9.094000000000001e-06,
"loss": 3.9191,
"step": 910
},
{
"epoch": 0.24,
"learning_rate": 9.089e-06,
"loss": 3.9527,
"step": 915
},
{
"epoch": 0.24,
"learning_rate": 9.084e-06,
"loss": 3.8934,
"step": 920
},
{
"epoch": 0.24,
"learning_rate": 9.079e-06,
"loss": 3.9773,
"step": 925
},
{
"epoch": 0.24,
"learning_rate": 9.074e-06,
"loss": 3.823,
"step": 930
},
{
"epoch": 0.25,
"learning_rate": 9.069e-06,
"loss": 3.8857,
"step": 935
},
{
"epoch": 0.25,
"learning_rate": 9.064e-06,
"loss": 3.9092,
"step": 940
},
{
"epoch": 0.25,
"learning_rate": 9.059000000000001e-06,
"loss": 3.8338,
"step": 945
},
{
"epoch": 0.25,
"learning_rate": 9.054e-06,
"loss": 3.9457,
"step": 950
},
{
"epoch": 0.25,
"learning_rate": 9.049000000000001e-06,
"loss": 3.8869,
"step": 955
},
{
"epoch": 0.25,
"learning_rate": 9.044e-06,
"loss": 3.8594,
"step": 960
},
{
"epoch": 0.25,
"learning_rate": 9.039000000000001e-06,
"loss": 4.0318,
"step": 965
},
{
"epoch": 0.25,
"learning_rate": 9.034e-06,
"loss": 3.8469,
"step": 970
},
{
"epoch": 0.26,
"learning_rate": 9.029000000000001e-06,
"loss": 3.8367,
"step": 975
},
{
"epoch": 0.26,
"learning_rate": 9.024e-06,
"loss": 3.8814,
"step": 980
},
{
"epoch": 0.26,
"learning_rate": 9.019000000000001e-06,
"loss": 3.8818,
"step": 985
},
{
"epoch": 0.26,
"learning_rate": 9.014e-06,
"loss": 3.908,
"step": 990
},
{
"epoch": 0.26,
"learning_rate": 9.009000000000001e-06,
"loss": 3.9705,
"step": 995
},
{
"epoch": 0.26,
"learning_rate": 9.004e-06,
"loss": 3.9086,
"step": 1000
},
{
"epoch": 0.26,
"learning_rate": 8.999000000000001e-06,
"loss": 3.9795,
"step": 1005
},
{
"epoch": 0.26,
"learning_rate": 8.994e-06,
"loss": 3.8629,
"step": 1010
},
{
"epoch": 0.27,
"learning_rate": 8.989000000000002e-06,
"loss": 3.8287,
"step": 1015
},
{
"epoch": 0.27,
"learning_rate": 8.984e-06,
"loss": 3.8717,
"step": 1020
},
{
"epoch": 0.27,
"learning_rate": 8.979000000000002e-06,
"loss": 3.8865,
"step": 1025
},
{
"epoch": 0.27,
"learning_rate": 8.974e-06,
"loss": 3.8344,
"step": 1030
},
{
"epoch": 0.27,
"learning_rate": 8.969000000000002e-06,
"loss": 3.9541,
"step": 1035
},
{
"epoch": 0.27,
"learning_rate": 8.964000000000001e-06,
"loss": 3.8318,
"step": 1040
},
{
"epoch": 0.27,
"learning_rate": 8.959000000000002e-06,
"loss": 3.9328,
"step": 1045
},
{
"epoch": 0.28,
"learning_rate": 8.954000000000001e-06,
"loss": 3.8621,
"step": 1050
},
{
"epoch": 0.28,
"learning_rate": 8.949e-06,
"loss": 3.7871,
"step": 1055
},
{
"epoch": 0.28,
"learning_rate": 8.944000000000001e-06,
"loss": 3.8988,
"step": 1060
},
{
"epoch": 0.28,
"learning_rate": 8.939e-06,
"loss": 3.8232,
"step": 1065
},
{
"epoch": 0.28,
"learning_rate": 8.934000000000001e-06,
"loss": 3.8816,
"step": 1070
},
{
"epoch": 0.28,
"learning_rate": 8.929e-06,
"loss": 3.8775,
"step": 1075
},
{
"epoch": 0.28,
"learning_rate": 8.924e-06,
"loss": 3.8115,
"step": 1080
},
{
"epoch": 0.28,
"learning_rate": 8.919e-06,
"loss": 3.7941,
"step": 1085
},
{
"epoch": 0.29,
"learning_rate": 8.914e-06,
"loss": 3.8678,
"step": 1090
},
{
"epoch": 0.29,
"learning_rate": 8.909e-06,
"loss": 3.8215,
"step": 1095
},
{
"epoch": 0.29,
"learning_rate": 8.904e-06,
"loss": 3.79,
"step": 1100
},
{
"epoch": 0.29,
"learning_rate": 8.899e-06,
"loss": 3.8092,
"step": 1105
},
{
"epoch": 0.29,
"learning_rate": 8.894e-06,
"loss": 3.79,
"step": 1110
},
{
"epoch": 0.29,
"learning_rate": 8.889e-06,
"loss": 3.8162,
"step": 1115
},
{
"epoch": 0.29,
"learning_rate": 8.884e-06,
"loss": 3.8568,
"step": 1120
},
{
"epoch": 0.29,
"learning_rate": 8.879000000000001e-06,
"loss": 3.867,
"step": 1125
},
{
"epoch": 0.3,
"learning_rate": 8.874e-06,
"loss": 3.7988,
"step": 1130
},
{
"epoch": 0.3,
"learning_rate": 8.869000000000001e-06,
"loss": 3.8088,
"step": 1135
},
{
"epoch": 0.3,
"learning_rate": 8.864e-06,
"loss": 3.7711,
"step": 1140
},
{
"epoch": 0.3,
"learning_rate": 8.859000000000001e-06,
"loss": 3.7242,
"step": 1145
},
{
"epoch": 0.3,
"learning_rate": 8.854e-06,
"loss": 3.8512,
"step": 1150
},
{
"epoch": 0.3,
"learning_rate": 8.849000000000001e-06,
"loss": 3.8945,
"step": 1155
},
{
"epoch": 0.3,
"learning_rate": 8.844e-06,
"loss": 3.8687,
"step": 1160
},
{
"epoch": 0.31,
"learning_rate": 8.839000000000001e-06,
"loss": 3.7533,
"step": 1165
},
{
"epoch": 0.31,
"learning_rate": 8.834e-06,
"loss": 3.8707,
"step": 1170
},
{
"epoch": 0.31,
"learning_rate": 8.829000000000001e-06,
"loss": 3.8086,
"step": 1175
},
{
"epoch": 0.31,
"learning_rate": 8.824e-06,
"loss": 3.7467,
"step": 1180
},
{
"epoch": 0.31,
"learning_rate": 8.819000000000001e-06,
"loss": 3.8078,
"step": 1185
},
{
"epoch": 0.31,
"learning_rate": 8.814e-06,
"loss": 3.7465,
"step": 1190
},
{
"epoch": 0.31,
"learning_rate": 8.809000000000002e-06,
"loss": 3.7955,
"step": 1195
},
{
"epoch": 0.31,
"learning_rate": 8.804e-06,
"loss": 3.8281,
"step": 1200
},
{
"epoch": 0.32,
"learning_rate": 8.799000000000002e-06,
"loss": 3.8035,
"step": 1205
},
{
"epoch": 0.32,
"learning_rate": 8.794e-06,
"loss": 3.7963,
"step": 1210
},
{
"epoch": 0.32,
"learning_rate": 8.789e-06,
"loss": 3.8061,
"step": 1215
},
{
"epoch": 0.32,
"learning_rate": 8.784000000000001e-06,
"loss": 3.777,
"step": 1220
},
{
"epoch": 0.32,
"learning_rate": 8.779e-06,
"loss": 3.7582,
"step": 1225
},
{
"epoch": 0.32,
"learning_rate": 8.774000000000001e-06,
"loss": 3.7725,
"step": 1230
},
{
"epoch": 0.32,
"learning_rate": 8.769e-06,
"loss": 3.7516,
"step": 1235
},
{
"epoch": 0.33,
"learning_rate": 8.764e-06,
"loss": 3.8543,
"step": 1240
},
{
"epoch": 0.33,
"learning_rate": 8.759e-06,
"loss": 3.8566,
"step": 1245
},
{
"epoch": 0.33,
"learning_rate": 8.754e-06,
"loss": 3.7695,
"step": 1250
},
{
"epoch": 0.33,
"learning_rate": 8.749e-06,
"loss": 3.8271,
"step": 1255
},
{
"epoch": 0.33,
"learning_rate": 8.744e-06,
"loss": 3.773,
"step": 1260
},
{
"epoch": 0.33,
"learning_rate": 8.739e-06,
"loss": 3.7283,
"step": 1265
},
{
"epoch": 0.33,
"learning_rate": 8.734e-06,
"loss": 3.7822,
"step": 1270
},
{
"epoch": 0.33,
"learning_rate": 8.729e-06,
"loss": 3.7816,
"step": 1275
},
{
"epoch": 0.34,
"learning_rate": 8.724e-06,
"loss": 3.751,
"step": 1280
},
{
"epoch": 0.34,
"learning_rate": 8.719e-06,
"loss": 3.8271,
"step": 1285
},
{
"epoch": 0.34,
"learning_rate": 8.714e-06,
"loss": 3.7195,
"step": 1290
},
{
"epoch": 0.34,
"learning_rate": 8.709e-06,
"loss": 3.7584,
"step": 1295
},
{
"epoch": 0.34,
"learning_rate": 8.704e-06,
"loss": 3.7889,
"step": 1300
},
{
"epoch": 0.34,
"learning_rate": 8.699000000000001e-06,
"loss": 3.8529,
"step": 1305
},
{
"epoch": 0.34,
"learning_rate": 8.694e-06,
"loss": 3.8166,
"step": 1310
},
{
"epoch": 0.34,
"learning_rate": 8.689000000000001e-06,
"loss": 3.7484,
"step": 1315
},
{
"epoch": 0.35,
"learning_rate": 8.684e-06,
"loss": 3.8014,
"step": 1320
},
{
"epoch": 0.35,
"learning_rate": 8.679000000000001e-06,
"loss": 3.7658,
"step": 1325
},
{
"epoch": 0.35,
"learning_rate": 8.674e-06,
"loss": 3.7834,
"step": 1330
},
{
"epoch": 0.35,
"learning_rate": 8.669000000000001e-06,
"loss": 3.7973,
"step": 1335
},
{
"epoch": 0.35,
"learning_rate": 8.664e-06,
"loss": 3.7607,
"step": 1340
},
{
"epoch": 0.35,
"learning_rate": 8.659000000000001e-06,
"loss": 3.7381,
"step": 1345
},
{
"epoch": 0.35,
"learning_rate": 8.654e-06,
"loss": 3.751,
"step": 1350
},
{
"epoch": 0.36,
"learning_rate": 8.649000000000001e-06,
"loss": 3.7201,
"step": 1355
},
{
"epoch": 0.36,
"learning_rate": 8.644e-06,
"loss": 3.7969,
"step": 1360
},
{
"epoch": 0.36,
"learning_rate": 8.639000000000001e-06,
"loss": 3.7773,
"step": 1365
},
{
"epoch": 0.36,
"learning_rate": 8.634e-06,
"loss": 3.7752,
"step": 1370
},
{
"epoch": 0.36,
"learning_rate": 8.629e-06,
"loss": 3.6992,
"step": 1375
},
{
"epoch": 0.36,
"learning_rate": 8.624e-06,
"loss": 3.651,
"step": 1380
},
{
"epoch": 0.36,
"learning_rate": 8.619e-06,
"loss": 3.7598,
"step": 1385
},
{
"epoch": 0.36,
"learning_rate": 8.614000000000001e-06,
"loss": 3.7367,
"step": 1390
},
{
"epoch": 0.37,
"learning_rate": 8.609e-06,
"loss": 3.6896,
"step": 1395
},
{
"epoch": 0.37,
"learning_rate": 8.604000000000001e-06,
"loss": 3.7732,
"step": 1400
},
{
"epoch": 0.37,
"learning_rate": 8.599e-06,
"loss": 3.7836,
"step": 1405
},
{
"epoch": 0.37,
"learning_rate": 8.594000000000001e-06,
"loss": 3.7854,
"step": 1410
},
{
"epoch": 0.37,
"learning_rate": 8.589e-06,
"loss": 3.701,
"step": 1415
},
{
"epoch": 0.37,
"learning_rate": 8.584000000000001e-06,
"loss": 3.7652,
"step": 1420
},
{
"epoch": 0.37,
"learning_rate": 8.579e-06,
"loss": 3.775,
"step": 1425
},
{
"epoch": 0.37,
"learning_rate": 8.574000000000001e-06,
"loss": 3.7207,
"step": 1430
},
{
"epoch": 0.38,
"learning_rate": 8.569e-06,
"loss": 3.71,
"step": 1435
},
{
"epoch": 0.38,
"learning_rate": 8.564000000000001e-06,
"loss": 3.7359,
"step": 1440
},
{
"epoch": 0.38,
"learning_rate": 8.559e-06,
"loss": 3.6854,
"step": 1445
},
{
"epoch": 0.38,
"learning_rate": 8.554000000000001e-06,
"loss": 3.7342,
"step": 1450
},
{
"epoch": 0.38,
"learning_rate": 8.549e-06,
"loss": 3.6707,
"step": 1455
},
{
"epoch": 0.38,
"learning_rate": 8.544000000000002e-06,
"loss": 3.6596,
"step": 1460
},
{
"epoch": 0.38,
"learning_rate": 8.539e-06,
"loss": 3.6711,
"step": 1465
},
{
"epoch": 0.39,
"learning_rate": 8.534000000000002e-06,
"loss": 3.7279,
"step": 1470
},
{
"epoch": 0.39,
"learning_rate": 8.529e-06,
"loss": 3.7115,
"step": 1475
},
{
"epoch": 0.39,
"learning_rate": 8.524000000000002e-06,
"loss": 3.7139,
"step": 1480
},
{
"epoch": 0.39,
"learning_rate": 8.519000000000001e-06,
"loss": 3.674,
"step": 1485
},
{
"epoch": 0.39,
"learning_rate": 8.514000000000002e-06,
"loss": 3.6191,
"step": 1490
},
{
"epoch": 0.39,
"learning_rate": 8.509000000000001e-06,
"loss": 3.6361,
"step": 1495
},
{
"epoch": 0.39,
"learning_rate": 8.504000000000002e-06,
"loss": 3.7717,
"step": 1500
},
{
"epoch": 0.39,
"learning_rate": 8.499000000000001e-06,
"loss": 3.6355,
"step": 1505
},
{
"epoch": 0.4,
"learning_rate": 8.494e-06,
"loss": 3.8113,
"step": 1510
},
{
"epoch": 0.4,
"learning_rate": 8.489000000000001e-06,
"loss": 3.7465,
"step": 1515
},
{
"epoch": 0.4,
"learning_rate": 8.484e-06,
"loss": 3.8033,
"step": 1520
},
{
"epoch": 0.4,
"learning_rate": 8.479000000000001e-06,
"loss": 3.6867,
"step": 1525
},
{
"epoch": 0.4,
"learning_rate": 8.474e-06,
"loss": 3.7062,
"step": 1530
},
{
"epoch": 0.4,
"learning_rate": 8.469e-06,
"loss": 3.726,
"step": 1535
},
{
"epoch": 0.4,
"learning_rate": 8.464e-06,
"loss": 3.6432,
"step": 1540
},
{
"epoch": 0.4,
"learning_rate": 8.459e-06,
"loss": 3.6943,
"step": 1545
},
{
"epoch": 0.41,
"learning_rate": 8.454e-06,
"loss": 3.6127,
"step": 1550
},
{
"epoch": 0.41,
"learning_rate": 8.449e-06,
"loss": 3.6529,
"step": 1555
},
{
"epoch": 0.41,
"learning_rate": 8.444e-06,
"loss": 3.6063,
"step": 1560
},
{
"epoch": 0.41,
"learning_rate": 8.439e-06,
"loss": 3.7633,
"step": 1565
},
{
"epoch": 0.41,
"learning_rate": 8.434000000000001e-06,
"loss": 3.6211,
"step": 1570
},
{
"epoch": 0.41,
"learning_rate": 8.429e-06,
"loss": 3.6895,
"step": 1575
},
{
"epoch": 0.41,
"learning_rate": 8.424000000000001e-06,
"loss": 3.6152,
"step": 1580
},
{
"epoch": 0.42,
"learning_rate": 8.419e-06,
"loss": 3.6549,
"step": 1585
},
{
"epoch": 0.42,
"learning_rate": 8.414000000000001e-06,
"loss": 3.6502,
"step": 1590
},
{
"epoch": 0.42,
"learning_rate": 8.409e-06,
"loss": 3.5689,
"step": 1595
},
{
"epoch": 0.42,
"learning_rate": 8.404000000000001e-06,
"loss": 3.7002,
"step": 1600
},
{
"epoch": 0.42,
"learning_rate": 8.399e-06,
"loss": 3.5998,
"step": 1605
},
{
"epoch": 0.42,
"learning_rate": 8.394000000000001e-06,
"loss": 3.7164,
"step": 1610
},
{
"epoch": 0.42,
"learning_rate": 8.389e-06,
"loss": 3.6006,
"step": 1615
},
{
"epoch": 0.42,
"learning_rate": 8.384000000000001e-06,
"loss": 3.5586,
"step": 1620
},
{
"epoch": 0.43,
"learning_rate": 8.379e-06,
"loss": 3.6801,
"step": 1625
},
{
"epoch": 0.43,
"learning_rate": 8.374000000000001e-06,
"loss": 3.601,
"step": 1630
},
{
"epoch": 0.43,
"learning_rate": 8.369e-06,
"loss": 3.6344,
"step": 1635
},
{
"epoch": 0.43,
"learning_rate": 8.364000000000002e-06,
"loss": 3.6637,
"step": 1640
},
{
"epoch": 0.43,
"learning_rate": 8.359e-06,
"loss": 3.6357,
"step": 1645
},
{
"epoch": 0.43,
"learning_rate": 8.354000000000002e-06,
"loss": 3.652,
"step": 1650
},
{
"epoch": 0.43,
"learning_rate": 8.349000000000001e-06,
"loss": 3.6439,
"step": 1655
},
{
"epoch": 0.44,
"learning_rate": 8.344000000000002e-06,
"loss": 3.6051,
"step": 1660
},
{
"epoch": 0.44,
"learning_rate": 8.339000000000001e-06,
"loss": 3.6207,
"step": 1665
},
{
"epoch": 0.44,
"learning_rate": 8.334e-06,
"loss": 3.6059,
"step": 1670
},
{
"epoch": 0.44,
"learning_rate": 8.329000000000001e-06,
"loss": 3.7102,
"step": 1675
},
{
"epoch": 0.44,
"learning_rate": 8.324e-06,
"loss": 3.5629,
"step": 1680
},
{
"epoch": 0.44,
"learning_rate": 8.319000000000001e-06,
"loss": 3.6357,
"step": 1685
},
{
"epoch": 0.44,
"learning_rate": 8.314e-06,
"loss": 3.6416,
"step": 1690
},
{
"epoch": 0.44,
"learning_rate": 8.309e-06,
"loss": 3.6572,
"step": 1695
},
{
"epoch": 0.45,
"learning_rate": 8.304e-06,
"loss": 3.6244,
"step": 1700
},
{
"epoch": 0.45,
"learning_rate": 8.299e-06,
"loss": 3.677,
"step": 1705
},
{
"epoch": 0.45,
"learning_rate": 8.294e-06,
"loss": 3.6006,
"step": 1710
},
{
"epoch": 0.45,
"learning_rate": 8.289e-06,
"loss": 3.7182,
"step": 1715
},
{
"epoch": 0.45,
"learning_rate": 8.284e-06,
"loss": 3.6451,
"step": 1720
},
{
"epoch": 0.45,
"learning_rate": 8.279e-06,
"loss": 3.508,
"step": 1725
},
{
"epoch": 0.45,
"learning_rate": 8.274e-06,
"loss": 3.6182,
"step": 1730
},
{
"epoch": 0.45,
"learning_rate": 8.269e-06,
"loss": 3.5447,
"step": 1735
},
{
"epoch": 0.46,
"learning_rate": 8.264e-06,
"loss": 3.5941,
"step": 1740
},
{
"epoch": 0.46,
"learning_rate": 8.259e-06,
"loss": 3.5094,
"step": 1745
},
{
"epoch": 0.46,
"learning_rate": 8.254000000000001e-06,
"loss": 3.5988,
"step": 1750
},
{
"epoch": 0.46,
"learning_rate": 8.249e-06,
"loss": 3.6652,
"step": 1755
},
{
"epoch": 0.46,
"learning_rate": 8.244000000000001e-06,
"loss": 3.5957,
"step": 1760
},
{
"epoch": 0.46,
"learning_rate": 8.239e-06,
"loss": 3.5326,
"step": 1765
},
{
"epoch": 0.46,
"learning_rate": 8.234000000000001e-06,
"loss": 3.5537,
"step": 1770
},
{
"epoch": 0.47,
"learning_rate": 8.229e-06,
"loss": 3.5834,
"step": 1775
},
{
"epoch": 0.47,
"learning_rate": 8.224000000000001e-06,
"loss": 3.5666,
"step": 1780
},
{
"epoch": 0.47,
"learning_rate": 8.219e-06,
"loss": 3.6174,
"step": 1785
},
{
"epoch": 0.47,
"learning_rate": 8.214000000000001e-06,
"loss": 3.5148,
"step": 1790
},
{
"epoch": 0.47,
"learning_rate": 8.209e-06,
"loss": 3.5037,
"step": 1795
},
{
"epoch": 0.47,
"learning_rate": 8.204000000000001e-06,
"loss": 3.6,
"step": 1800
},
{
"epoch": 0.47,
"learning_rate": 8.199e-06,
"loss": 3.5457,
"step": 1805
},
{
"epoch": 0.47,
"learning_rate": 8.194000000000002e-06,
"loss": 3.5021,
"step": 1810
},
{
"epoch": 0.48,
"learning_rate": 8.189e-06,
"loss": 3.509,
"step": 1815
},
{
"epoch": 0.48,
"learning_rate": 8.184000000000002e-06,
"loss": 3.5457,
"step": 1820
},
{
"epoch": 0.48,
"learning_rate": 8.179e-06,
"loss": 3.5449,
"step": 1825
},
{
"epoch": 0.48,
"learning_rate": 8.174e-06,
"loss": 3.5832,
"step": 1830
},
{
"epoch": 0.48,
"learning_rate": 8.169000000000001e-06,
"loss": 3.4852,
"step": 1835
},
{
"epoch": 0.48,
"learning_rate": 8.164e-06,
"loss": 3.6166,
"step": 1840
},
{
"epoch": 0.48,
"learning_rate": 8.159000000000001e-06,
"loss": 3.5248,
"step": 1845
},
{
"epoch": 0.48,
"learning_rate": 8.154e-06,
"loss": 3.5617,
"step": 1850
},
{
"epoch": 0.49,
"learning_rate": 8.149e-06,
"loss": 3.5119,
"step": 1855
},
{
"epoch": 0.49,
"learning_rate": 8.144e-06,
"loss": 3.5475,
"step": 1860
},
{
"epoch": 0.49,
"learning_rate": 8.139e-06,
"loss": 3.5646,
"step": 1865
},
{
"epoch": 0.49,
"learning_rate": 8.134e-06,
"loss": 3.4521,
"step": 1870
},
{
"epoch": 0.49,
"learning_rate": 8.129e-06,
"loss": 3.492,
"step": 1875
},
{
"epoch": 0.49,
"learning_rate": 8.124e-06,
"loss": 3.6187,
"step": 1880
},
{
"epoch": 0.49,
"learning_rate": 8.119e-06,
"loss": 3.4984,
"step": 1885
},
{
"epoch": 0.5,
"learning_rate": 8.114e-06,
"loss": 3.5744,
"step": 1890
},
{
"epoch": 0.5,
"learning_rate": 8.109e-06,
"loss": 3.5514,
"step": 1895
},
{
"epoch": 0.5,
"learning_rate": 8.104e-06,
"loss": 3.4807,
"step": 1900
},
{
"epoch": 0.5,
"learning_rate": 8.099e-06,
"loss": 3.5049,
"step": 1905
},
{
"epoch": 0.5,
"learning_rate": 8.094e-06,
"loss": 3.5098,
"step": 1910
},
{
"epoch": 0.5,
"learning_rate": 8.089e-06,
"loss": 3.4152,
"step": 1915
},
{
"epoch": 0.5,
"learning_rate": 8.084000000000001e-06,
"loss": 3.4281,
"step": 1920
},
{
"epoch": 0.5,
"learning_rate": 8.079e-06,
"loss": 3.5766,
"step": 1925
},
{
"epoch": 0.51,
"learning_rate": 8.074000000000001e-06,
"loss": 3.4908,
"step": 1930
},
{
"epoch": 0.51,
"learning_rate": 8.069e-06,
"loss": 3.5432,
"step": 1935
},
{
"epoch": 0.51,
"learning_rate": 8.064000000000001e-06,
"loss": 3.5154,
"step": 1940
},
{
"epoch": 0.51,
"learning_rate": 8.059e-06,
"loss": 3.4568,
"step": 1945
},
{
"epoch": 0.51,
"learning_rate": 8.054000000000001e-06,
"loss": 3.5314,
"step": 1950
},
{
"epoch": 0.51,
"learning_rate": 8.049e-06,
"loss": 3.5516,
"step": 1955
},
{
"epoch": 0.51,
"learning_rate": 8.044000000000001e-06,
"loss": 3.4271,
"step": 1960
},
{
"epoch": 0.52,
"learning_rate": 8.039e-06,
"loss": 3.4174,
"step": 1965
},
{
"epoch": 0.52,
"learning_rate": 8.034000000000001e-06,
"loss": 3.5492,
"step": 1970
},
{
"epoch": 0.52,
"learning_rate": 8.029e-06,
"loss": 3.568,
"step": 1975
},
{
"epoch": 0.52,
"learning_rate": 8.024000000000001e-06,
"loss": 3.5455,
"step": 1980
},
{
"epoch": 0.52,
"learning_rate": 8.019e-06,
"loss": 3.5598,
"step": 1985
},
{
"epoch": 0.52,
"learning_rate": 8.014e-06,
"loss": 3.5848,
"step": 1990
},
{
"epoch": 0.52,
"learning_rate": 8.009e-06,
"loss": 3.4631,
"step": 1995
},
{
"epoch": 0.52,
"learning_rate": 8.004e-06,
"loss": 3.3873,
"step": 2000
}
],
"max_steps": 10000,
"num_train_epochs": 3,
"total_flos": 1.5936160471711744e+18,
"trial_name": null,
"trial_params": null
}