dolly-japanese-gpt-1b-clone / output /trainer_state.json
ce-lery's picture
style: 学習済みモデルを追加
5cb8212
raw
history blame
76.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 18.0,
"global_step": 27360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 1e-07,
"loss": 6.0775,
"step": 38
},
{
"epoch": 0.05,
"learning_rate": 1e-07,
"loss": 6.0462,
"step": 76
},
{
"epoch": 0.07,
"learning_rate": 1e-07,
"loss": 6.0014,
"step": 114
},
{
"epoch": 0.1,
"learning_rate": 1e-07,
"loss": 5.9744,
"step": 152
},
{
"epoch": 0.12,
"learning_rate": 1e-07,
"loss": 5.886,
"step": 190
},
{
"epoch": 0.15,
"learning_rate": 1e-07,
"loss": 5.8752,
"step": 228
},
{
"epoch": 0.17,
"learning_rate": 1e-07,
"loss": 5.8586,
"step": 266
},
{
"epoch": 0.2,
"learning_rate": 1e-07,
"loss": 5.7714,
"step": 304
},
{
"epoch": 0.23,
"learning_rate": 1e-07,
"loss": 5.7406,
"step": 342
},
{
"epoch": 0.25,
"learning_rate": 1e-07,
"loss": 5.7096,
"step": 380
},
{
"epoch": 0.28,
"learning_rate": 1e-07,
"loss": 5.6967,
"step": 418
},
{
"epoch": 0.3,
"learning_rate": 1e-07,
"loss": 5.6248,
"step": 456
},
{
"epoch": 0.33,
"learning_rate": 1e-07,
"loss": 5.5632,
"step": 494
},
{
"epoch": 0.35,
"learning_rate": 1e-07,
"loss": 5.5463,
"step": 532
},
{
"epoch": 0.38,
"learning_rate": 1e-07,
"loss": 5.518,
"step": 570
},
{
"epoch": 0.4,
"learning_rate": 1e-07,
"loss": 5.4135,
"step": 608
},
{
"epoch": 0.42,
"learning_rate": 1e-07,
"loss": 5.4107,
"step": 646
},
{
"epoch": 0.45,
"learning_rate": 1e-07,
"loss": 5.3979,
"step": 684
},
{
"epoch": 0.47,
"learning_rate": 1e-07,
"loss": 5.3563,
"step": 722
},
{
"epoch": 0.5,
"learning_rate": 1e-07,
"loss": 5.3835,
"step": 760
},
{
"epoch": 0.53,
"learning_rate": 1e-07,
"loss": 5.3005,
"step": 798
},
{
"epoch": 0.55,
"learning_rate": 1e-07,
"loss": 5.2324,
"step": 836
},
{
"epoch": 0.57,
"learning_rate": 1e-07,
"loss": 5.2447,
"step": 874
},
{
"epoch": 0.6,
"learning_rate": 1e-07,
"loss": 5.1957,
"step": 912
},
{
"epoch": 0.62,
"learning_rate": 1e-07,
"loss": 5.1413,
"step": 950
},
{
"epoch": 0.65,
"learning_rate": 1e-07,
"loss": 5.1094,
"step": 988
},
{
"epoch": 0.68,
"learning_rate": 1e-07,
"loss": 5.0822,
"step": 1026
},
{
"epoch": 0.7,
"learning_rate": 1e-07,
"loss": 5.0826,
"step": 1064
},
{
"epoch": 0.72,
"learning_rate": 1e-07,
"loss": 5.0354,
"step": 1102
},
{
"epoch": 0.75,
"learning_rate": 1e-07,
"loss": 4.9899,
"step": 1140
},
{
"epoch": 0.78,
"learning_rate": 1e-07,
"loss": 5.0207,
"step": 1178
},
{
"epoch": 0.8,
"learning_rate": 1e-07,
"loss": 4.9249,
"step": 1216
},
{
"epoch": 0.82,
"learning_rate": 1e-07,
"loss": 4.9205,
"step": 1254
},
{
"epoch": 0.85,
"learning_rate": 1e-07,
"loss": 4.8866,
"step": 1292
},
{
"epoch": 0.88,
"learning_rate": 1e-07,
"loss": 4.9054,
"step": 1330
},
{
"epoch": 0.9,
"learning_rate": 1e-07,
"loss": 4.9223,
"step": 1368
},
{
"epoch": 0.93,
"learning_rate": 1e-07,
"loss": 4.8158,
"step": 1406
},
{
"epoch": 0.95,
"learning_rate": 1e-07,
"loss": 4.842,
"step": 1444
},
{
"epoch": 0.97,
"learning_rate": 1e-07,
"loss": 4.7924,
"step": 1482
},
{
"epoch": 1.0,
"learning_rate": 1e-07,
"loss": 4.7735,
"step": 1520
},
{
"epoch": 1.02,
"learning_rate": 1e-07,
"loss": 4.803,
"step": 1558
},
{
"epoch": 1.05,
"learning_rate": 1e-07,
"loss": 4.7616,
"step": 1596
},
{
"epoch": 1.07,
"learning_rate": 1e-07,
"loss": 4.7375,
"step": 1634
},
{
"epoch": 1.1,
"learning_rate": 1e-07,
"loss": 4.7046,
"step": 1672
},
{
"epoch": 1.12,
"learning_rate": 1e-07,
"loss": 4.6259,
"step": 1710
},
{
"epoch": 1.15,
"learning_rate": 1e-07,
"loss": 4.6591,
"step": 1748
},
{
"epoch": 1.18,
"learning_rate": 1e-07,
"loss": 4.6459,
"step": 1786
},
{
"epoch": 1.2,
"learning_rate": 1e-07,
"loss": 4.5775,
"step": 1824
},
{
"epoch": 1.23,
"learning_rate": 1e-07,
"loss": 4.5543,
"step": 1862
},
{
"epoch": 1.25,
"learning_rate": 1e-07,
"loss": 4.6039,
"step": 1900
},
{
"epoch": 1.27,
"learning_rate": 1e-07,
"loss": 4.5725,
"step": 1938
},
{
"epoch": 1.3,
"learning_rate": 1e-07,
"loss": 4.55,
"step": 1976
},
{
"epoch": 1.32,
"learning_rate": 1e-07,
"loss": 4.4842,
"step": 2014
},
{
"epoch": 1.35,
"learning_rate": 1e-07,
"loss": 4.4519,
"step": 2052
},
{
"epoch": 1.38,
"learning_rate": 1e-07,
"loss": 4.4428,
"step": 2090
},
{
"epoch": 1.4,
"learning_rate": 1e-07,
"loss": 4.4975,
"step": 2128
},
{
"epoch": 1.43,
"learning_rate": 1e-07,
"loss": 4.4482,
"step": 2166
},
{
"epoch": 1.45,
"learning_rate": 1e-07,
"loss": 4.439,
"step": 2204
},
{
"epoch": 1.48,
"learning_rate": 1e-07,
"loss": 4.4258,
"step": 2242
},
{
"epoch": 1.5,
"learning_rate": 1e-07,
"loss": 4.4161,
"step": 2280
},
{
"epoch": 1.52,
"learning_rate": 1e-07,
"loss": 4.4324,
"step": 2318
},
{
"epoch": 1.55,
"learning_rate": 1e-07,
"loss": 4.3676,
"step": 2356
},
{
"epoch": 1.57,
"learning_rate": 1e-07,
"loss": 4.35,
"step": 2394
},
{
"epoch": 1.6,
"learning_rate": 1e-07,
"loss": 4.3224,
"step": 2432
},
{
"epoch": 1.62,
"learning_rate": 1e-07,
"loss": 4.3432,
"step": 2470
},
{
"epoch": 1.65,
"learning_rate": 1e-07,
"loss": 4.2577,
"step": 2508
},
{
"epoch": 1.68,
"learning_rate": 1e-07,
"loss": 4.385,
"step": 2546
},
{
"epoch": 1.7,
"learning_rate": 1e-07,
"loss": 4.2489,
"step": 2584
},
{
"epoch": 1.73,
"learning_rate": 1e-07,
"loss": 4.2986,
"step": 2622
},
{
"epoch": 1.75,
"learning_rate": 1e-07,
"loss": 4.2423,
"step": 2660
},
{
"epoch": 1.77,
"learning_rate": 1e-07,
"loss": 4.2712,
"step": 2698
},
{
"epoch": 1.8,
"learning_rate": 1e-07,
"loss": 4.2693,
"step": 2736
},
{
"epoch": 1.82,
"learning_rate": 1e-07,
"loss": 4.1746,
"step": 2774
},
{
"epoch": 1.85,
"learning_rate": 1e-07,
"loss": 4.1345,
"step": 2812
},
{
"epoch": 1.88,
"learning_rate": 1e-07,
"loss": 4.0853,
"step": 2850
},
{
"epoch": 1.9,
"learning_rate": 1e-07,
"loss": 4.1348,
"step": 2888
},
{
"epoch": 1.93,
"learning_rate": 1e-07,
"loss": 4.1303,
"step": 2926
},
{
"epoch": 1.95,
"learning_rate": 1e-07,
"loss": 4.0622,
"step": 2964
},
{
"epoch": 1.98,
"learning_rate": 1e-07,
"loss": 4.0644,
"step": 3002
},
{
"epoch": 2.0,
"learning_rate": 1e-07,
"loss": 4.0578,
"step": 3040
},
{
"epoch": 2.02,
"learning_rate": 1e-07,
"loss": 3.9785,
"step": 3078
},
{
"epoch": 2.05,
"learning_rate": 1e-07,
"loss": 4.0323,
"step": 3116
},
{
"epoch": 2.08,
"learning_rate": 1e-07,
"loss": 4.0035,
"step": 3154
},
{
"epoch": 2.1,
"learning_rate": 1e-07,
"loss": 3.9697,
"step": 3192
},
{
"epoch": 2.12,
"learning_rate": 1e-07,
"loss": 3.9225,
"step": 3230
},
{
"epoch": 2.15,
"learning_rate": 1e-07,
"loss": 3.9822,
"step": 3268
},
{
"epoch": 2.17,
"learning_rate": 1e-07,
"loss": 3.9636,
"step": 3306
},
{
"epoch": 2.2,
"learning_rate": 1e-07,
"loss": 3.9574,
"step": 3344
},
{
"epoch": 2.23,
"learning_rate": 1e-07,
"loss": 3.8573,
"step": 3382
},
{
"epoch": 2.25,
"learning_rate": 1e-07,
"loss": 4.0107,
"step": 3420
},
{
"epoch": 2.27,
"learning_rate": 1e-07,
"loss": 3.8145,
"step": 3458
},
{
"epoch": 2.3,
"learning_rate": 1e-07,
"loss": 3.8424,
"step": 3496
},
{
"epoch": 2.33,
"learning_rate": 1e-07,
"loss": 3.8641,
"step": 3534
},
{
"epoch": 2.35,
"learning_rate": 1e-07,
"loss": 3.8606,
"step": 3572
},
{
"epoch": 2.38,
"learning_rate": 1e-07,
"loss": 3.7903,
"step": 3610
},
{
"epoch": 2.4,
"learning_rate": 1e-07,
"loss": 3.7378,
"step": 3648
},
{
"epoch": 2.42,
"learning_rate": 1e-07,
"loss": 3.7713,
"step": 3686
},
{
"epoch": 2.45,
"learning_rate": 1e-07,
"loss": 3.834,
"step": 3724
},
{
"epoch": 2.48,
"learning_rate": 1e-07,
"loss": 3.7785,
"step": 3762
},
{
"epoch": 2.5,
"learning_rate": 1e-07,
"loss": 3.8277,
"step": 3800
},
{
"epoch": 2.52,
"learning_rate": 1e-07,
"loss": 3.7287,
"step": 3838
},
{
"epoch": 2.55,
"learning_rate": 1e-07,
"loss": 3.7123,
"step": 3876
},
{
"epoch": 2.58,
"learning_rate": 1e-07,
"loss": 3.7185,
"step": 3914
},
{
"epoch": 2.6,
"learning_rate": 1e-07,
"loss": 3.6936,
"step": 3952
},
{
"epoch": 2.62,
"learning_rate": 1e-07,
"loss": 3.7462,
"step": 3990
},
{
"epoch": 2.65,
"learning_rate": 1e-07,
"loss": 3.6844,
"step": 4028
},
{
"epoch": 2.67,
"learning_rate": 1e-07,
"loss": 3.6709,
"step": 4066
},
{
"epoch": 2.7,
"learning_rate": 1e-07,
"loss": 3.6508,
"step": 4104
},
{
"epoch": 2.73,
"learning_rate": 1e-07,
"loss": 3.6247,
"step": 4142
},
{
"epoch": 2.75,
"learning_rate": 1e-07,
"loss": 3.6459,
"step": 4180
},
{
"epoch": 2.77,
"learning_rate": 1e-07,
"loss": 3.6169,
"step": 4218
},
{
"epoch": 2.8,
"learning_rate": 1e-07,
"loss": 3.6944,
"step": 4256
},
{
"epoch": 2.83,
"learning_rate": 1e-07,
"loss": 3.6458,
"step": 4294
},
{
"epoch": 2.85,
"learning_rate": 1e-07,
"loss": 3.5598,
"step": 4332
},
{
"epoch": 2.88,
"learning_rate": 1e-07,
"loss": 3.5221,
"step": 4370
},
{
"epoch": 2.9,
"learning_rate": 1e-07,
"loss": 3.6462,
"step": 4408
},
{
"epoch": 2.92,
"learning_rate": 1e-07,
"loss": 3.6155,
"step": 4446
},
{
"epoch": 2.95,
"learning_rate": 1e-07,
"loss": 3.5649,
"step": 4484
},
{
"epoch": 2.98,
"learning_rate": 1e-07,
"loss": 3.5088,
"step": 4522
},
{
"epoch": 3.0,
"learning_rate": 1e-07,
"loss": 3.6479,
"step": 4560
},
{
"epoch": 3.02,
"learning_rate": 1e-07,
"loss": 3.5495,
"step": 4598
},
{
"epoch": 3.05,
"learning_rate": 1e-07,
"loss": 3.4842,
"step": 4636
},
{
"epoch": 3.08,
"learning_rate": 1e-07,
"loss": 3.6004,
"step": 4674
},
{
"epoch": 3.1,
"learning_rate": 1e-07,
"loss": 3.4871,
"step": 4712
},
{
"epoch": 3.12,
"learning_rate": 1e-07,
"loss": 3.5237,
"step": 4750
},
{
"epoch": 3.15,
"learning_rate": 1e-07,
"loss": 3.5153,
"step": 4788
},
{
"epoch": 3.17,
"learning_rate": 1e-07,
"loss": 3.4592,
"step": 4826
},
{
"epoch": 3.2,
"learning_rate": 1e-07,
"loss": 3.5256,
"step": 4864
},
{
"epoch": 3.23,
"learning_rate": 1e-07,
"loss": 3.4191,
"step": 4902
},
{
"epoch": 3.25,
"learning_rate": 1e-07,
"loss": 3.4433,
"step": 4940
},
{
"epoch": 3.27,
"learning_rate": 1e-07,
"loss": 3.51,
"step": 4978
},
{
"epoch": 3.3,
"learning_rate": 1e-07,
"loss": 3.4299,
"step": 5016
},
{
"epoch": 3.33,
"learning_rate": 1e-07,
"loss": 3.4375,
"step": 5054
},
{
"epoch": 3.35,
"learning_rate": 1e-07,
"loss": 3.4766,
"step": 5092
},
{
"epoch": 3.38,
"learning_rate": 1e-07,
"loss": 3.4243,
"step": 5130
},
{
"epoch": 3.4,
"learning_rate": 1e-07,
"loss": 3.4521,
"step": 5168
},
{
"epoch": 3.42,
"learning_rate": 1e-07,
"loss": 3.4048,
"step": 5206
},
{
"epoch": 3.45,
"learning_rate": 1e-07,
"loss": 3.4903,
"step": 5244
},
{
"epoch": 3.48,
"learning_rate": 1e-07,
"loss": 3.3147,
"step": 5282
},
{
"epoch": 3.5,
"learning_rate": 1e-07,
"loss": 3.4223,
"step": 5320
},
{
"epoch": 3.52,
"learning_rate": 1e-07,
"loss": 3.3592,
"step": 5358
},
{
"epoch": 3.55,
"learning_rate": 1e-07,
"loss": 3.4287,
"step": 5396
},
{
"epoch": 3.58,
"learning_rate": 1e-07,
"loss": 3.2888,
"step": 5434
},
{
"epoch": 3.6,
"learning_rate": 1e-07,
"loss": 3.352,
"step": 5472
},
{
"epoch": 3.62,
"learning_rate": 1e-07,
"loss": 3.352,
"step": 5510
},
{
"epoch": 3.65,
"learning_rate": 1e-07,
"loss": 3.4211,
"step": 5548
},
{
"epoch": 3.67,
"learning_rate": 1e-07,
"loss": 3.4113,
"step": 5586
},
{
"epoch": 3.7,
"learning_rate": 1e-07,
"loss": 3.2818,
"step": 5624
},
{
"epoch": 3.73,
"learning_rate": 1e-07,
"loss": 3.368,
"step": 5662
},
{
"epoch": 3.75,
"learning_rate": 1e-07,
"loss": 3.3881,
"step": 5700
},
{
"epoch": 3.77,
"learning_rate": 1e-07,
"loss": 3.3362,
"step": 5738
},
{
"epoch": 3.8,
"learning_rate": 1e-07,
"loss": 3.3529,
"step": 5776
},
{
"epoch": 3.83,
"learning_rate": 1e-07,
"loss": 3.2348,
"step": 5814
},
{
"epoch": 3.85,
"learning_rate": 1e-07,
"loss": 3.3256,
"step": 5852
},
{
"epoch": 3.88,
"learning_rate": 1e-07,
"loss": 3.2392,
"step": 5890
},
{
"epoch": 3.9,
"learning_rate": 1e-07,
"loss": 3.2072,
"step": 5928
},
{
"epoch": 3.92,
"learning_rate": 1e-07,
"loss": 3.2969,
"step": 5966
},
{
"epoch": 3.95,
"learning_rate": 1e-07,
"loss": 3.2984,
"step": 6004
},
{
"epoch": 3.98,
"learning_rate": 1e-07,
"loss": 3.2254,
"step": 6042
},
{
"epoch": 4.0,
"learning_rate": 1e-07,
"loss": 3.3567,
"step": 6080
},
{
"epoch": 4.03,
"learning_rate": 1e-07,
"loss": 3.2742,
"step": 6118
},
{
"epoch": 4.05,
"learning_rate": 1e-07,
"loss": 3.3057,
"step": 6156
},
{
"epoch": 4.08,
"learning_rate": 1e-07,
"loss": 3.1336,
"step": 6194
},
{
"epoch": 4.1,
"learning_rate": 1e-07,
"loss": 3.2602,
"step": 6232
},
{
"epoch": 4.12,
"learning_rate": 1e-07,
"loss": 3.3183,
"step": 6270
},
{
"epoch": 4.15,
"learning_rate": 1e-07,
"loss": 3.2419,
"step": 6308
},
{
"epoch": 4.17,
"learning_rate": 1e-07,
"loss": 3.2496,
"step": 6346
},
{
"epoch": 4.2,
"learning_rate": 1e-07,
"loss": 3.2705,
"step": 6384
},
{
"epoch": 4.22,
"learning_rate": 1e-07,
"loss": 3.1646,
"step": 6422
},
{
"epoch": 4.25,
"learning_rate": 1e-07,
"loss": 3.1935,
"step": 6460
},
{
"epoch": 4.28,
"learning_rate": 1e-07,
"loss": 3.2376,
"step": 6498
},
{
"epoch": 4.3,
"learning_rate": 1e-07,
"loss": 3.1907,
"step": 6536
},
{
"epoch": 4.33,
"learning_rate": 1e-07,
"loss": 3.1895,
"step": 6574
},
{
"epoch": 4.35,
"learning_rate": 1e-07,
"loss": 3.2795,
"step": 6612
},
{
"epoch": 4.38,
"learning_rate": 1e-07,
"loss": 3.1587,
"step": 6650
},
{
"epoch": 4.4,
"learning_rate": 1e-07,
"loss": 3.2195,
"step": 6688
},
{
"epoch": 4.42,
"learning_rate": 1e-07,
"loss": 3.2117,
"step": 6726
},
{
"epoch": 4.45,
"learning_rate": 1e-07,
"loss": 3.1271,
"step": 6764
},
{
"epoch": 4.47,
"learning_rate": 1e-07,
"loss": 3.1652,
"step": 6802
},
{
"epoch": 4.5,
"learning_rate": 1e-07,
"loss": 3.1776,
"step": 6840
},
{
"epoch": 4.53,
"learning_rate": 1e-07,
"loss": 3.0865,
"step": 6878
},
{
"epoch": 4.55,
"learning_rate": 1e-07,
"loss": 3.1704,
"step": 6916
},
{
"epoch": 4.58,
"learning_rate": 1e-07,
"loss": 3.1358,
"step": 6954
},
{
"epoch": 4.6,
"learning_rate": 1e-07,
"loss": 3.1571,
"step": 6992
},
{
"epoch": 4.62,
"learning_rate": 1e-07,
"loss": 3.1921,
"step": 7030
},
{
"epoch": 4.65,
"learning_rate": 1e-07,
"loss": 3.1816,
"step": 7068
},
{
"epoch": 4.67,
"learning_rate": 1e-07,
"loss": 3.0454,
"step": 7106
},
{
"epoch": 4.7,
"learning_rate": 1e-07,
"loss": 3.0481,
"step": 7144
},
{
"epoch": 4.72,
"learning_rate": 1e-07,
"loss": 3.1092,
"step": 7182
},
{
"epoch": 4.75,
"learning_rate": 1e-07,
"loss": 3.1149,
"step": 7220
},
{
"epoch": 4.78,
"learning_rate": 1e-07,
"loss": 3.1654,
"step": 7258
},
{
"epoch": 4.8,
"learning_rate": 1e-07,
"loss": 3.1102,
"step": 7296
},
{
"epoch": 4.83,
"learning_rate": 1e-07,
"loss": 3.1573,
"step": 7334
},
{
"epoch": 4.85,
"learning_rate": 1e-07,
"loss": 3.0639,
"step": 7372
},
{
"epoch": 4.88,
"learning_rate": 1e-07,
"loss": 3.0567,
"step": 7410
},
{
"epoch": 4.9,
"learning_rate": 1e-07,
"loss": 3.0979,
"step": 7448
},
{
"epoch": 4.92,
"learning_rate": 1e-07,
"loss": 3.0217,
"step": 7486
},
{
"epoch": 4.95,
"learning_rate": 1e-07,
"loss": 3.0613,
"step": 7524
},
{
"epoch": 4.97,
"learning_rate": 1e-07,
"loss": 3.0275,
"step": 7562
},
{
"epoch": 5.0,
"learning_rate": 1e-07,
"loss": 3.0743,
"step": 7600
},
{
"epoch": 5.03,
"learning_rate": 1e-07,
"loss": 3.0545,
"step": 7638
},
{
"epoch": 5.05,
"learning_rate": 1e-07,
"loss": 3.0066,
"step": 7676
},
{
"epoch": 5.08,
"learning_rate": 1e-07,
"loss": 3.0473,
"step": 7714
},
{
"epoch": 5.1,
"learning_rate": 1e-07,
"loss": 3.0846,
"step": 7752
},
{
"epoch": 5.12,
"learning_rate": 1e-07,
"loss": 3.1315,
"step": 7790
},
{
"epoch": 5.15,
"learning_rate": 1e-07,
"loss": 2.9579,
"step": 7828
},
{
"epoch": 5.17,
"learning_rate": 1e-07,
"loss": 3.0408,
"step": 7866
},
{
"epoch": 5.2,
"learning_rate": 1e-07,
"loss": 3.0525,
"step": 7904
},
{
"epoch": 5.22,
"learning_rate": 1e-07,
"loss": 3.0084,
"step": 7942
},
{
"epoch": 5.25,
"learning_rate": 1e-07,
"loss": 3.0704,
"step": 7980
},
{
"epoch": 5.28,
"learning_rate": 1e-07,
"loss": 3.0312,
"step": 8018
},
{
"epoch": 5.3,
"learning_rate": 1e-07,
"loss": 2.9437,
"step": 8056
},
{
"epoch": 5.33,
"learning_rate": 1e-07,
"loss": 3.0565,
"step": 8094
},
{
"epoch": 5.35,
"learning_rate": 1e-07,
"loss": 2.9435,
"step": 8132
},
{
"epoch": 5.38,
"learning_rate": 1e-07,
"loss": 2.9414,
"step": 8170
},
{
"epoch": 5.4,
"learning_rate": 1e-07,
"loss": 3.0381,
"step": 8208
},
{
"epoch": 5.42,
"learning_rate": 1e-07,
"loss": 3.0162,
"step": 8246
},
{
"epoch": 5.45,
"learning_rate": 1e-07,
"loss": 3.0205,
"step": 8284
},
{
"epoch": 5.47,
"learning_rate": 1e-07,
"loss": 2.9968,
"step": 8322
},
{
"epoch": 5.5,
"learning_rate": 1e-07,
"loss": 3.0021,
"step": 8360
},
{
"epoch": 5.53,
"learning_rate": 1e-07,
"loss": 2.9997,
"step": 8398
},
{
"epoch": 5.55,
"learning_rate": 1e-07,
"loss": 3.0112,
"step": 8436
},
{
"epoch": 5.58,
"learning_rate": 1e-07,
"loss": 3.0385,
"step": 8474
},
{
"epoch": 5.6,
"learning_rate": 1e-07,
"loss": 2.9613,
"step": 8512
},
{
"epoch": 5.62,
"learning_rate": 1e-07,
"loss": 2.9484,
"step": 8550
},
{
"epoch": 5.65,
"learning_rate": 1e-07,
"loss": 2.979,
"step": 8588
},
{
"epoch": 5.67,
"learning_rate": 1e-07,
"loss": 2.9796,
"step": 8626
},
{
"epoch": 5.7,
"learning_rate": 1e-07,
"loss": 2.9801,
"step": 8664
},
{
"epoch": 5.72,
"learning_rate": 1e-07,
"loss": 3.0399,
"step": 8702
},
{
"epoch": 5.75,
"learning_rate": 1e-07,
"loss": 2.9223,
"step": 8740
},
{
"epoch": 5.78,
"learning_rate": 1e-07,
"loss": 3.0202,
"step": 8778
},
{
"epoch": 5.8,
"learning_rate": 1e-07,
"loss": 2.9057,
"step": 8816
},
{
"epoch": 5.83,
"learning_rate": 1e-07,
"loss": 2.9556,
"step": 8854
},
{
"epoch": 5.85,
"learning_rate": 1e-07,
"loss": 2.9582,
"step": 8892
},
{
"epoch": 5.88,
"learning_rate": 1e-07,
"loss": 2.8448,
"step": 8930
},
{
"epoch": 5.9,
"learning_rate": 1e-07,
"loss": 3.0643,
"step": 8968
},
{
"epoch": 5.92,
"learning_rate": 1e-07,
"loss": 2.8477,
"step": 9006
},
{
"epoch": 5.95,
"learning_rate": 1e-07,
"loss": 2.9684,
"step": 9044
},
{
"epoch": 5.97,
"learning_rate": 1e-07,
"loss": 2.9086,
"step": 9082
},
{
"epoch": 6.0,
"learning_rate": 1e-07,
"loss": 2.8426,
"step": 9120
},
{
"epoch": 6.03,
"learning_rate": 1e-07,
"loss": 2.903,
"step": 9158
},
{
"epoch": 6.05,
"learning_rate": 1e-07,
"loss": 2.8398,
"step": 9196
},
{
"epoch": 6.08,
"learning_rate": 1e-07,
"loss": 2.8839,
"step": 9234
},
{
"epoch": 6.1,
"learning_rate": 1e-07,
"loss": 2.9396,
"step": 9272
},
{
"epoch": 6.12,
"learning_rate": 1e-07,
"loss": 2.87,
"step": 9310
},
{
"epoch": 6.15,
"learning_rate": 1e-07,
"loss": 2.8619,
"step": 9348
},
{
"epoch": 6.17,
"learning_rate": 1e-07,
"loss": 2.8699,
"step": 9386
},
{
"epoch": 6.2,
"learning_rate": 1e-07,
"loss": 2.9366,
"step": 9424
},
{
"epoch": 6.22,
"learning_rate": 1e-07,
"loss": 2.895,
"step": 9462
},
{
"epoch": 6.25,
"learning_rate": 1e-07,
"loss": 2.8928,
"step": 9500
},
{
"epoch": 6.28,
"learning_rate": 1e-07,
"loss": 2.889,
"step": 9538
},
{
"epoch": 6.3,
"learning_rate": 1e-07,
"loss": 2.9291,
"step": 9576
},
{
"epoch": 6.33,
"learning_rate": 1e-07,
"loss": 2.8722,
"step": 9614
},
{
"epoch": 6.35,
"learning_rate": 1e-07,
"loss": 2.8362,
"step": 9652
},
{
"epoch": 6.38,
"learning_rate": 1e-07,
"loss": 2.8519,
"step": 9690
},
{
"epoch": 6.4,
"learning_rate": 1e-07,
"loss": 2.8364,
"step": 9728
},
{
"epoch": 6.42,
"learning_rate": 1e-07,
"loss": 2.9237,
"step": 9766
},
{
"epoch": 6.45,
"learning_rate": 1e-07,
"loss": 2.876,
"step": 9804
},
{
"epoch": 6.47,
"learning_rate": 1e-07,
"loss": 2.8436,
"step": 9842
},
{
"epoch": 6.5,
"learning_rate": 1e-07,
"loss": 2.8657,
"step": 9880
},
{
"epoch": 6.53,
"learning_rate": 1e-07,
"loss": 2.8283,
"step": 9918
},
{
"epoch": 6.55,
"learning_rate": 1e-07,
"loss": 2.8939,
"step": 9956
},
{
"epoch": 6.58,
"learning_rate": 1e-07,
"loss": 2.8956,
"step": 9994
},
{
"epoch": 6.6,
"learning_rate": 1e-07,
"loss": 2.8361,
"step": 10032
},
{
"epoch": 6.62,
"learning_rate": 1e-07,
"loss": 2.8309,
"step": 10070
},
{
"epoch": 6.65,
"learning_rate": 1e-07,
"loss": 2.8439,
"step": 10108
},
{
"epoch": 6.67,
"learning_rate": 1e-07,
"loss": 2.9308,
"step": 10146
},
{
"epoch": 6.7,
"learning_rate": 1e-07,
"loss": 2.8025,
"step": 10184
},
{
"epoch": 6.72,
"learning_rate": 1e-07,
"loss": 2.8796,
"step": 10222
},
{
"epoch": 6.75,
"learning_rate": 1e-07,
"loss": 2.8225,
"step": 10260
},
{
"epoch": 6.78,
"learning_rate": 1e-07,
"loss": 2.9773,
"step": 10298
},
{
"epoch": 6.8,
"learning_rate": 1e-07,
"loss": 2.8718,
"step": 10336
},
{
"epoch": 6.83,
"learning_rate": 1e-07,
"loss": 2.8718,
"step": 10374
},
{
"epoch": 6.85,
"learning_rate": 1e-07,
"loss": 2.8839,
"step": 10412
},
{
"epoch": 6.88,
"learning_rate": 1e-07,
"loss": 2.8241,
"step": 10450
},
{
"epoch": 6.9,
"learning_rate": 1e-07,
"loss": 2.811,
"step": 10488
},
{
"epoch": 6.92,
"learning_rate": 1e-07,
"loss": 2.8386,
"step": 10526
},
{
"epoch": 6.95,
"learning_rate": 1e-07,
"loss": 2.8042,
"step": 10564
},
{
"epoch": 6.97,
"learning_rate": 1e-07,
"loss": 2.7817,
"step": 10602
},
{
"epoch": 7.0,
"learning_rate": 1e-07,
"loss": 2.776,
"step": 10640
},
{
"epoch": 7.03,
"learning_rate": 1e-07,
"loss": 2.8606,
"step": 10678
},
{
"epoch": 7.05,
"learning_rate": 1e-07,
"loss": 2.8196,
"step": 10716
},
{
"epoch": 7.08,
"learning_rate": 1e-07,
"loss": 2.9032,
"step": 10754
},
{
"epoch": 7.1,
"learning_rate": 1e-07,
"loss": 2.8276,
"step": 10792
},
{
"epoch": 7.12,
"learning_rate": 1e-07,
"loss": 2.7629,
"step": 10830
},
{
"epoch": 7.15,
"learning_rate": 1e-07,
"loss": 2.7876,
"step": 10868
},
{
"epoch": 7.17,
"learning_rate": 1e-07,
"loss": 2.8237,
"step": 10906
},
{
"epoch": 7.2,
"learning_rate": 1e-07,
"loss": 2.8782,
"step": 10944
},
{
"epoch": 7.22,
"learning_rate": 1e-07,
"loss": 2.7644,
"step": 10982
},
{
"epoch": 7.25,
"learning_rate": 1e-07,
"loss": 2.7422,
"step": 11020
},
{
"epoch": 7.28,
"learning_rate": 1e-07,
"loss": 2.8169,
"step": 11058
},
{
"epoch": 7.3,
"learning_rate": 1e-07,
"loss": 2.8212,
"step": 11096
},
{
"epoch": 7.33,
"learning_rate": 1e-07,
"loss": 2.6941,
"step": 11134
},
{
"epoch": 7.35,
"learning_rate": 1e-07,
"loss": 2.8517,
"step": 11172
},
{
"epoch": 7.38,
"learning_rate": 1e-07,
"loss": 2.8385,
"step": 11210
},
{
"epoch": 7.4,
"learning_rate": 1e-07,
"loss": 2.755,
"step": 11248
},
{
"epoch": 7.42,
"learning_rate": 1e-07,
"loss": 2.7692,
"step": 11286
},
{
"epoch": 7.45,
"learning_rate": 1e-07,
"loss": 2.8242,
"step": 11324
},
{
"epoch": 7.47,
"learning_rate": 1e-07,
"loss": 2.7609,
"step": 11362
},
{
"epoch": 7.5,
"learning_rate": 1e-07,
"loss": 2.7633,
"step": 11400
},
{
"epoch": 7.53,
"learning_rate": 1e-07,
"loss": 2.7764,
"step": 11438
},
{
"epoch": 7.55,
"learning_rate": 1e-07,
"loss": 2.7956,
"step": 11476
},
{
"epoch": 7.58,
"learning_rate": 1e-07,
"loss": 2.7179,
"step": 11514
},
{
"epoch": 7.6,
"learning_rate": 1e-07,
"loss": 2.7766,
"step": 11552
},
{
"epoch": 7.62,
"learning_rate": 1e-07,
"loss": 2.8152,
"step": 11590
},
{
"epoch": 7.65,
"learning_rate": 1e-07,
"loss": 2.7367,
"step": 11628
},
{
"epoch": 7.67,
"learning_rate": 1e-07,
"loss": 2.7899,
"step": 11666
},
{
"epoch": 7.7,
"learning_rate": 1e-07,
"loss": 2.8211,
"step": 11704
},
{
"epoch": 7.72,
"learning_rate": 1e-07,
"loss": 2.7512,
"step": 11742
},
{
"epoch": 7.75,
"learning_rate": 1e-07,
"loss": 2.7689,
"step": 11780
},
{
"epoch": 7.78,
"learning_rate": 1e-07,
"loss": 2.7889,
"step": 11818
},
{
"epoch": 7.8,
"learning_rate": 1e-07,
"loss": 2.7053,
"step": 11856
},
{
"epoch": 7.83,
"learning_rate": 1e-07,
"loss": 2.7996,
"step": 11894
},
{
"epoch": 7.85,
"learning_rate": 1e-07,
"loss": 2.6374,
"step": 11932
},
{
"epoch": 7.88,
"learning_rate": 1e-07,
"loss": 2.7144,
"step": 11970
},
{
"epoch": 7.9,
"learning_rate": 1e-07,
"loss": 2.6325,
"step": 12008
},
{
"epoch": 7.92,
"learning_rate": 1e-07,
"loss": 2.7357,
"step": 12046
},
{
"epoch": 7.95,
"learning_rate": 1e-07,
"loss": 2.7822,
"step": 12084
},
{
"epoch": 7.97,
"learning_rate": 1e-07,
"loss": 2.7798,
"step": 12122
},
{
"epoch": 8.0,
"learning_rate": 1e-07,
"loss": 2.7708,
"step": 12160
},
{
"epoch": 8.03,
"learning_rate": 1e-07,
"loss": 2.7695,
"step": 12198
},
{
"epoch": 8.05,
"learning_rate": 1e-07,
"loss": 2.7509,
"step": 12236
},
{
"epoch": 8.07,
"learning_rate": 1e-07,
"loss": 2.6471,
"step": 12274
},
{
"epoch": 8.1,
"learning_rate": 1e-07,
"loss": 2.6833,
"step": 12312
},
{
"epoch": 8.12,
"learning_rate": 1e-07,
"loss": 2.737,
"step": 12350
},
{
"epoch": 8.15,
"learning_rate": 1e-07,
"loss": 2.7074,
"step": 12388
},
{
"epoch": 8.18,
"learning_rate": 1e-07,
"loss": 2.7347,
"step": 12426
},
{
"epoch": 8.2,
"learning_rate": 1e-07,
"loss": 2.6959,
"step": 12464
},
{
"epoch": 8.22,
"learning_rate": 1e-07,
"loss": 2.6782,
"step": 12502
},
{
"epoch": 8.25,
"learning_rate": 1e-07,
"loss": 2.727,
"step": 12540
},
{
"epoch": 8.28,
"learning_rate": 1e-07,
"loss": 2.6834,
"step": 12578
},
{
"epoch": 8.3,
"learning_rate": 1e-07,
"loss": 2.7919,
"step": 12616
},
{
"epoch": 8.32,
"learning_rate": 1e-07,
"loss": 2.7861,
"step": 12654
},
{
"epoch": 8.35,
"learning_rate": 1e-07,
"loss": 2.6642,
"step": 12692
},
{
"epoch": 8.38,
"learning_rate": 1e-07,
"loss": 2.7843,
"step": 12730
},
{
"epoch": 8.4,
"learning_rate": 1e-07,
"loss": 2.7456,
"step": 12768
},
{
"epoch": 8.43,
"learning_rate": 1e-07,
"loss": 2.749,
"step": 12806
},
{
"epoch": 8.45,
"learning_rate": 1e-07,
"loss": 2.6919,
"step": 12844
},
{
"epoch": 8.47,
"learning_rate": 1e-07,
"loss": 2.7122,
"step": 12882
},
{
"epoch": 8.5,
"learning_rate": 1e-07,
"loss": 2.6637,
"step": 12920
},
{
"epoch": 8.53,
"learning_rate": 1e-07,
"loss": 2.7101,
"step": 12958
},
{
"epoch": 8.55,
"learning_rate": 1e-07,
"loss": 2.716,
"step": 12996
},
{
"epoch": 8.57,
"learning_rate": 1e-07,
"loss": 2.698,
"step": 13034
},
{
"epoch": 8.6,
"learning_rate": 1e-07,
"loss": 2.7127,
"step": 13072
},
{
"epoch": 8.62,
"learning_rate": 1e-07,
"loss": 2.7368,
"step": 13110
},
{
"epoch": 8.65,
"learning_rate": 1e-07,
"loss": 2.6313,
"step": 13148
},
{
"epoch": 8.68,
"learning_rate": 1e-07,
"loss": 2.7304,
"step": 13186
},
{
"epoch": 8.7,
"learning_rate": 1e-07,
"loss": 2.7396,
"step": 13224
},
{
"epoch": 8.72,
"learning_rate": 1e-07,
"loss": 2.6746,
"step": 13262
},
{
"epoch": 8.75,
"learning_rate": 1e-07,
"loss": 2.6744,
"step": 13300
},
{
"epoch": 8.78,
"learning_rate": 1e-07,
"loss": 2.6228,
"step": 13338
},
{
"epoch": 8.8,
"learning_rate": 1e-07,
"loss": 2.7504,
"step": 13376
},
{
"epoch": 8.82,
"learning_rate": 1e-07,
"loss": 2.7281,
"step": 13414
},
{
"epoch": 8.85,
"learning_rate": 1e-07,
"loss": 2.7886,
"step": 13452
},
{
"epoch": 8.88,
"learning_rate": 1e-07,
"loss": 2.7505,
"step": 13490
},
{
"epoch": 8.9,
"learning_rate": 1e-07,
"loss": 2.7029,
"step": 13528
},
{
"epoch": 8.93,
"learning_rate": 1e-07,
"loss": 2.7687,
"step": 13566
},
{
"epoch": 8.95,
"learning_rate": 1e-07,
"loss": 2.6783,
"step": 13604
},
{
"epoch": 8.97,
"learning_rate": 1e-07,
"loss": 2.6507,
"step": 13642
},
{
"epoch": 9.0,
"learning_rate": 1e-07,
"loss": 2.673,
"step": 13680
},
{
"epoch": 9.03,
"learning_rate": 1e-07,
"loss": 2.6298,
"step": 13718
},
{
"epoch": 9.05,
"learning_rate": 1e-07,
"loss": 2.6612,
"step": 13756
},
{
"epoch": 9.07,
"learning_rate": 1e-07,
"loss": 2.7233,
"step": 13794
},
{
"epoch": 9.1,
"learning_rate": 1e-07,
"loss": 2.6386,
"step": 13832
},
{
"epoch": 9.12,
"learning_rate": 1e-07,
"loss": 2.6991,
"step": 13870
},
{
"epoch": 9.15,
"learning_rate": 1e-07,
"loss": 2.6629,
"step": 13908
},
{
"epoch": 9.18,
"learning_rate": 1e-07,
"loss": 2.6775,
"step": 13946
},
{
"epoch": 9.2,
"learning_rate": 1e-07,
"loss": 2.6804,
"step": 13984
},
{
"epoch": 9.22,
"learning_rate": 1e-07,
"loss": 2.6196,
"step": 14022
},
{
"epoch": 9.25,
"learning_rate": 1e-07,
"loss": 2.6489,
"step": 14060
},
{
"epoch": 9.28,
"learning_rate": 1e-07,
"loss": 2.7262,
"step": 14098
},
{
"epoch": 9.3,
"learning_rate": 1e-07,
"loss": 2.6845,
"step": 14136
},
{
"epoch": 9.32,
"learning_rate": 1e-07,
"loss": 2.6479,
"step": 14174
},
{
"epoch": 9.35,
"learning_rate": 1e-07,
"loss": 2.7273,
"step": 14212
},
{
"epoch": 9.38,
"learning_rate": 1e-07,
"loss": 2.6825,
"step": 14250
},
{
"epoch": 9.4,
"learning_rate": 1e-07,
"loss": 2.6207,
"step": 14288
},
{
"epoch": 9.43,
"learning_rate": 1e-07,
"loss": 2.6727,
"step": 14326
},
{
"epoch": 9.45,
"learning_rate": 1e-07,
"loss": 2.6411,
"step": 14364
},
{
"epoch": 9.47,
"learning_rate": 1e-07,
"loss": 2.7265,
"step": 14402
},
{
"epoch": 9.5,
"learning_rate": 1e-07,
"loss": 2.7001,
"step": 14440
},
{
"epoch": 9.53,
"learning_rate": 1e-07,
"loss": 2.699,
"step": 14478
},
{
"epoch": 9.55,
"learning_rate": 1e-07,
"loss": 2.612,
"step": 14516
},
{
"epoch": 9.57,
"learning_rate": 1e-07,
"loss": 2.6412,
"step": 14554
},
{
"epoch": 9.6,
"learning_rate": 1e-07,
"loss": 2.634,
"step": 14592
},
{
"epoch": 9.62,
"learning_rate": 1e-07,
"loss": 2.6596,
"step": 14630
},
{
"epoch": 9.65,
"learning_rate": 1e-07,
"loss": 2.702,
"step": 14668
},
{
"epoch": 9.68,
"learning_rate": 1e-07,
"loss": 2.692,
"step": 14706
},
{
"epoch": 9.7,
"learning_rate": 1e-07,
"loss": 2.6904,
"step": 14744
},
{
"epoch": 9.72,
"learning_rate": 1e-07,
"loss": 2.649,
"step": 14782
},
{
"epoch": 9.75,
"learning_rate": 1e-07,
"loss": 2.7208,
"step": 14820
},
{
"epoch": 9.78,
"learning_rate": 1e-07,
"loss": 2.6421,
"step": 14858
},
{
"epoch": 9.8,
"learning_rate": 1e-07,
"loss": 2.6062,
"step": 14896
},
{
"epoch": 9.82,
"learning_rate": 1e-07,
"loss": 2.6326,
"step": 14934
},
{
"epoch": 9.85,
"learning_rate": 1e-07,
"loss": 2.6574,
"step": 14972
},
{
"epoch": 9.88,
"learning_rate": 1e-07,
"loss": 2.6527,
"step": 15010
},
{
"epoch": 9.9,
"learning_rate": 1e-07,
"loss": 2.6796,
"step": 15048
},
{
"epoch": 9.93,
"learning_rate": 1e-07,
"loss": 2.543,
"step": 15086
},
{
"epoch": 9.95,
"learning_rate": 1e-07,
"loss": 2.6001,
"step": 15124
},
{
"epoch": 9.97,
"learning_rate": 1e-07,
"loss": 2.6147,
"step": 15162
},
{
"epoch": 10.0,
"learning_rate": 1e-07,
"loss": 2.6627,
"step": 15200
},
{
"epoch": 10.03,
"learning_rate": 1e-07,
"loss": 2.6809,
"step": 15238
},
{
"epoch": 10.05,
"learning_rate": 1e-07,
"loss": 2.6684,
"step": 15276
},
{
"epoch": 10.07,
"learning_rate": 1e-07,
"loss": 2.6266,
"step": 15314
},
{
"epoch": 10.1,
"learning_rate": 1e-07,
"loss": 2.6882,
"step": 15352
},
{
"epoch": 10.12,
"learning_rate": 1e-07,
"loss": 2.6337,
"step": 15390
},
{
"epoch": 10.15,
"learning_rate": 1e-07,
"loss": 2.6511,
"step": 15428
},
{
"epoch": 10.18,
"learning_rate": 1e-07,
"loss": 2.5565,
"step": 15466
},
{
"epoch": 10.2,
"learning_rate": 1e-07,
"loss": 2.6532,
"step": 15504
},
{
"epoch": 10.22,
"learning_rate": 1e-07,
"loss": 2.6808,
"step": 15542
},
{
"epoch": 10.25,
"learning_rate": 1e-07,
"loss": 2.6367,
"step": 15580
},
{
"epoch": 10.28,
"learning_rate": 1e-07,
"loss": 2.6816,
"step": 15618
},
{
"epoch": 10.3,
"learning_rate": 1e-07,
"loss": 2.5894,
"step": 15656
},
{
"epoch": 10.32,
"learning_rate": 1e-07,
"loss": 2.6045,
"step": 15694
},
{
"epoch": 10.35,
"learning_rate": 1e-07,
"loss": 2.6664,
"step": 15732
},
{
"epoch": 10.38,
"learning_rate": 1e-07,
"loss": 2.6359,
"step": 15770
},
{
"epoch": 10.4,
"learning_rate": 1e-07,
"loss": 2.595,
"step": 15808
},
{
"epoch": 10.43,
"learning_rate": 1e-07,
"loss": 2.7218,
"step": 15846
},
{
"epoch": 10.45,
"learning_rate": 1e-07,
"loss": 2.6015,
"step": 15884
},
{
"epoch": 10.47,
"learning_rate": 1e-07,
"loss": 2.6469,
"step": 15922
},
{
"epoch": 10.5,
"learning_rate": 1e-07,
"loss": 2.6632,
"step": 15960
},
{
"epoch": 10.53,
"learning_rate": 1e-07,
"loss": 2.6137,
"step": 15998
},
{
"epoch": 10.55,
"learning_rate": 1e-07,
"loss": 2.5723,
"step": 16036
},
{
"epoch": 10.57,
"learning_rate": 1e-07,
"loss": 2.5788,
"step": 16074
},
{
"epoch": 10.6,
"learning_rate": 1e-07,
"loss": 2.6213,
"step": 16112
},
{
"epoch": 10.62,
"learning_rate": 1e-07,
"loss": 2.6261,
"step": 16150
},
{
"epoch": 10.65,
"learning_rate": 1e-07,
"loss": 2.5937,
"step": 16188
},
{
"epoch": 10.68,
"learning_rate": 1e-07,
"loss": 2.5266,
"step": 16226
},
{
"epoch": 10.7,
"learning_rate": 1e-07,
"loss": 2.6844,
"step": 16264
},
{
"epoch": 10.72,
"learning_rate": 1e-07,
"loss": 2.5672,
"step": 16302
},
{
"epoch": 10.75,
"learning_rate": 1e-07,
"loss": 2.5905,
"step": 16340
},
{
"epoch": 10.78,
"learning_rate": 1e-07,
"loss": 2.5908,
"step": 16378
},
{
"epoch": 10.8,
"learning_rate": 1e-07,
"loss": 2.7097,
"step": 16416
},
{
"epoch": 10.82,
"learning_rate": 1e-07,
"loss": 2.6675,
"step": 16454
},
{
"epoch": 10.85,
"learning_rate": 1e-07,
"loss": 2.5913,
"step": 16492
},
{
"epoch": 10.88,
"learning_rate": 1e-07,
"loss": 2.6108,
"step": 16530
},
{
"epoch": 10.9,
"learning_rate": 1e-07,
"loss": 2.5413,
"step": 16568
},
{
"epoch": 10.93,
"learning_rate": 1e-07,
"loss": 2.6089,
"step": 16606
},
{
"epoch": 10.95,
"learning_rate": 1e-07,
"loss": 2.5889,
"step": 16644
},
{
"epoch": 10.97,
"learning_rate": 1e-07,
"loss": 2.5092,
"step": 16682
},
{
"epoch": 11.0,
"learning_rate": 1e-07,
"loss": 2.6164,
"step": 16720
},
{
"epoch": 11.03,
"learning_rate": 1e-07,
"loss": 2.6324,
"step": 16758
},
{
"epoch": 11.05,
"learning_rate": 1e-07,
"loss": 2.6398,
"step": 16796
},
{
"epoch": 11.07,
"learning_rate": 1e-07,
"loss": 2.6602,
"step": 16834
},
{
"epoch": 11.1,
"learning_rate": 1e-07,
"loss": 2.6362,
"step": 16872
},
{
"epoch": 11.12,
"learning_rate": 1e-07,
"loss": 2.574,
"step": 16910
},
{
"epoch": 11.15,
"learning_rate": 1e-07,
"loss": 2.5672,
"step": 16948
},
{
"epoch": 11.18,
"learning_rate": 1e-07,
"loss": 2.5726,
"step": 16986
},
{
"epoch": 11.2,
"learning_rate": 1e-07,
"loss": 2.6055,
"step": 17024
},
{
"epoch": 11.22,
"learning_rate": 1e-07,
"loss": 2.5427,
"step": 17062
},
{
"epoch": 11.25,
"learning_rate": 1e-07,
"loss": 2.5632,
"step": 17100
},
{
"epoch": 11.28,
"learning_rate": 1e-07,
"loss": 2.6167,
"step": 17138
},
{
"epoch": 11.3,
"learning_rate": 1e-07,
"loss": 2.584,
"step": 17176
},
{
"epoch": 11.32,
"learning_rate": 1e-07,
"loss": 2.6179,
"step": 17214
},
{
"epoch": 11.35,
"learning_rate": 1e-07,
"loss": 2.6232,
"step": 17252
},
{
"epoch": 11.38,
"learning_rate": 1e-07,
"loss": 2.6158,
"step": 17290
},
{
"epoch": 11.4,
"learning_rate": 1e-07,
"loss": 2.6115,
"step": 17328
},
{
"epoch": 11.43,
"learning_rate": 1e-07,
"loss": 2.5079,
"step": 17366
},
{
"epoch": 11.45,
"learning_rate": 1e-07,
"loss": 2.6525,
"step": 17404
},
{
"epoch": 11.47,
"learning_rate": 1e-07,
"loss": 2.5643,
"step": 17442
},
{
"epoch": 11.5,
"learning_rate": 1e-07,
"loss": 2.6215,
"step": 17480
},
{
"epoch": 11.53,
"learning_rate": 1e-07,
"loss": 2.5774,
"step": 17518
},
{
"epoch": 11.55,
"learning_rate": 1e-07,
"loss": 2.5696,
"step": 17556
},
{
"epoch": 11.57,
"learning_rate": 1e-07,
"loss": 2.5622,
"step": 17594
},
{
"epoch": 11.6,
"learning_rate": 1e-07,
"loss": 2.5572,
"step": 17632
},
{
"epoch": 11.62,
"learning_rate": 1e-07,
"loss": 2.5853,
"step": 17670
},
{
"epoch": 11.65,
"learning_rate": 1e-07,
"loss": 2.6487,
"step": 17708
},
{
"epoch": 11.68,
"learning_rate": 1e-07,
"loss": 2.5708,
"step": 17746
},
{
"epoch": 11.7,
"learning_rate": 1e-07,
"loss": 2.5757,
"step": 17784
},
{
"epoch": 11.72,
"learning_rate": 1e-07,
"loss": 2.5935,
"step": 17822
},
{
"epoch": 11.75,
"learning_rate": 1e-07,
"loss": 2.5212,
"step": 17860
},
{
"epoch": 11.78,
"learning_rate": 1e-07,
"loss": 2.6067,
"step": 17898
},
{
"epoch": 11.8,
"learning_rate": 1e-07,
"loss": 2.5712,
"step": 17936
},
{
"epoch": 11.82,
"learning_rate": 1e-07,
"loss": 2.5646,
"step": 17974
},
{
"epoch": 11.85,
"learning_rate": 1e-07,
"loss": 2.5789,
"step": 18012
},
{
"epoch": 11.88,
"learning_rate": 1e-07,
"loss": 2.6135,
"step": 18050
},
{
"epoch": 11.9,
"learning_rate": 1e-07,
"loss": 2.5586,
"step": 18088
},
{
"epoch": 11.93,
"learning_rate": 1e-07,
"loss": 2.5409,
"step": 18126
},
{
"epoch": 11.95,
"learning_rate": 1e-07,
"loss": 2.5607,
"step": 18164
},
{
"epoch": 11.97,
"learning_rate": 1e-07,
"loss": 2.5547,
"step": 18202
},
{
"epoch": 12.0,
"learning_rate": 1e-07,
"loss": 2.5457,
"step": 18240
},
{
"epoch": 12.03,
"learning_rate": 1e-07,
"loss": 2.5895,
"step": 18278
},
{
"epoch": 12.05,
"learning_rate": 1e-07,
"loss": 2.61,
"step": 18316
},
{
"epoch": 12.07,
"learning_rate": 1e-07,
"loss": 2.6503,
"step": 18354
},
{
"epoch": 12.1,
"learning_rate": 1e-07,
"loss": 2.5908,
"step": 18392
},
{
"epoch": 12.12,
"learning_rate": 1e-07,
"loss": 2.5728,
"step": 18430
},
{
"epoch": 12.15,
"learning_rate": 1e-07,
"loss": 2.5264,
"step": 18468
},
{
"epoch": 12.18,
"learning_rate": 1e-07,
"loss": 2.52,
"step": 18506
},
{
"epoch": 12.2,
"learning_rate": 1e-07,
"loss": 2.6832,
"step": 18544
},
{
"epoch": 12.22,
"learning_rate": 1e-07,
"loss": 2.6024,
"step": 18582
},
{
"epoch": 12.25,
"learning_rate": 1e-07,
"loss": 2.4557,
"step": 18620
},
{
"epoch": 12.28,
"learning_rate": 1e-07,
"loss": 2.6531,
"step": 18658
},
{
"epoch": 12.3,
"learning_rate": 1e-07,
"loss": 2.5052,
"step": 18696
},
{
"epoch": 12.32,
"learning_rate": 1e-07,
"loss": 2.4904,
"step": 18734
},
{
"epoch": 12.35,
"learning_rate": 1e-07,
"loss": 2.5336,
"step": 18772
},
{
"epoch": 12.38,
"learning_rate": 1e-07,
"loss": 2.5244,
"step": 18810
},
{
"epoch": 12.4,
"learning_rate": 1e-07,
"loss": 2.5641,
"step": 18848
},
{
"epoch": 12.43,
"learning_rate": 1e-07,
"loss": 2.5388,
"step": 18886
},
{
"epoch": 12.45,
"learning_rate": 1e-07,
"loss": 2.5346,
"step": 18924
},
{
"epoch": 12.47,
"learning_rate": 1e-07,
"loss": 2.5518,
"step": 18962
},
{
"epoch": 12.5,
"learning_rate": 1e-07,
"loss": 2.5202,
"step": 19000
},
{
"epoch": 12.53,
"learning_rate": 1e-07,
"loss": 2.5293,
"step": 19038
},
{
"epoch": 12.55,
"learning_rate": 1e-07,
"loss": 2.567,
"step": 19076
},
{
"epoch": 12.57,
"learning_rate": 1e-07,
"loss": 2.5357,
"step": 19114
},
{
"epoch": 12.6,
"learning_rate": 1e-07,
"loss": 2.5785,
"step": 19152
},
{
"epoch": 12.62,
"learning_rate": 1e-07,
"loss": 2.6327,
"step": 19190
},
{
"epoch": 12.65,
"learning_rate": 1e-07,
"loss": 2.6912,
"step": 19228
},
{
"epoch": 12.68,
"learning_rate": 1e-07,
"loss": 2.4824,
"step": 19266
},
{
"epoch": 12.7,
"learning_rate": 1e-07,
"loss": 2.5996,
"step": 19304
},
{
"epoch": 12.72,
"learning_rate": 1e-07,
"loss": 2.5246,
"step": 19342
},
{
"epoch": 12.75,
"learning_rate": 1e-07,
"loss": 2.5583,
"step": 19380
},
{
"epoch": 12.78,
"learning_rate": 1e-07,
"loss": 2.515,
"step": 19418
},
{
"epoch": 12.8,
"learning_rate": 1e-07,
"loss": 2.5677,
"step": 19456
},
{
"epoch": 12.82,
"learning_rate": 1e-07,
"loss": 2.5488,
"step": 19494
},
{
"epoch": 12.85,
"learning_rate": 1e-07,
"loss": 2.5562,
"step": 19532
},
{
"epoch": 12.88,
"learning_rate": 1e-07,
"loss": 2.544,
"step": 19570
},
{
"epoch": 12.9,
"learning_rate": 1e-07,
"loss": 2.5297,
"step": 19608
},
{
"epoch": 12.93,
"learning_rate": 1e-07,
"loss": 2.5091,
"step": 19646
},
{
"epoch": 12.95,
"learning_rate": 1e-07,
"loss": 2.5492,
"step": 19684
},
{
"epoch": 12.97,
"learning_rate": 1e-07,
"loss": 2.4553,
"step": 19722
},
{
"epoch": 13.0,
"learning_rate": 1e-07,
"loss": 2.5344,
"step": 19760
},
{
"epoch": 13.03,
"learning_rate": 1e-07,
"loss": 2.4807,
"step": 19798
},
{
"epoch": 13.05,
"learning_rate": 1e-07,
"loss": 2.4998,
"step": 19836
},
{
"epoch": 13.07,
"learning_rate": 1e-07,
"loss": 2.5033,
"step": 19874
},
{
"epoch": 13.1,
"learning_rate": 1e-07,
"loss": 2.5731,
"step": 19912
},
{
"epoch": 13.12,
"learning_rate": 1e-07,
"loss": 2.5362,
"step": 19950
},
{
"epoch": 13.15,
"learning_rate": 1e-07,
"loss": 2.5897,
"step": 19988
},
{
"epoch": 13.18,
"learning_rate": 1e-07,
"loss": 2.5419,
"step": 20026
},
{
"epoch": 13.2,
"learning_rate": 1e-07,
"loss": 2.5616,
"step": 20064
},
{
"epoch": 13.22,
"learning_rate": 1e-07,
"loss": 2.4845,
"step": 20102
},
{
"epoch": 13.25,
"learning_rate": 1e-07,
"loss": 2.5642,
"step": 20140
},
{
"epoch": 13.28,
"learning_rate": 1e-07,
"loss": 2.4496,
"step": 20178
},
{
"epoch": 13.3,
"learning_rate": 1e-07,
"loss": 2.5136,
"step": 20216
},
{
"epoch": 13.32,
"learning_rate": 1e-07,
"loss": 2.5947,
"step": 20254
},
{
"epoch": 13.35,
"learning_rate": 1e-07,
"loss": 2.5272,
"step": 20292
},
{
"epoch": 13.38,
"learning_rate": 1e-07,
"loss": 2.5801,
"step": 20330
},
{
"epoch": 13.4,
"learning_rate": 1e-07,
"loss": 2.4692,
"step": 20368
},
{
"epoch": 13.43,
"learning_rate": 1e-07,
"loss": 2.4849,
"step": 20406
},
{
"epoch": 13.45,
"learning_rate": 1e-07,
"loss": 2.5942,
"step": 20444
},
{
"epoch": 13.47,
"learning_rate": 1e-07,
"loss": 2.5341,
"step": 20482
},
{
"epoch": 13.5,
"learning_rate": 1e-07,
"loss": 2.4762,
"step": 20520
},
{
"epoch": 13.53,
"learning_rate": 1e-07,
"loss": 2.5744,
"step": 20558
},
{
"epoch": 13.55,
"learning_rate": 1e-07,
"loss": 2.5383,
"step": 20596
},
{
"epoch": 13.57,
"learning_rate": 1e-07,
"loss": 2.5769,
"step": 20634
},
{
"epoch": 13.6,
"learning_rate": 1e-07,
"loss": 2.5196,
"step": 20672
},
{
"epoch": 13.62,
"learning_rate": 1e-07,
"loss": 2.6015,
"step": 20710
},
{
"epoch": 13.65,
"learning_rate": 1e-07,
"loss": 2.5733,
"step": 20748
},
{
"epoch": 13.68,
"learning_rate": 1e-07,
"loss": 2.4808,
"step": 20786
},
{
"epoch": 13.7,
"learning_rate": 1e-07,
"loss": 2.4772,
"step": 20824
},
{
"epoch": 13.72,
"learning_rate": 1e-07,
"loss": 2.5212,
"step": 20862
},
{
"epoch": 13.75,
"learning_rate": 1e-07,
"loss": 2.5066,
"step": 20900
},
{
"epoch": 13.78,
"learning_rate": 1e-07,
"loss": 2.542,
"step": 20938
},
{
"epoch": 13.8,
"learning_rate": 1e-07,
"loss": 2.5422,
"step": 20976
},
{
"epoch": 13.82,
"learning_rate": 1e-07,
"loss": 2.5284,
"step": 21014
},
{
"epoch": 13.85,
"learning_rate": 1e-07,
"loss": 2.5472,
"step": 21052
},
{
"epoch": 13.88,
"learning_rate": 1e-07,
"loss": 2.6212,
"step": 21090
},
{
"epoch": 13.9,
"learning_rate": 1e-07,
"loss": 2.4717,
"step": 21128
},
{
"epoch": 13.93,
"learning_rate": 1e-07,
"loss": 2.5001,
"step": 21166
},
{
"epoch": 13.95,
"learning_rate": 1e-07,
"loss": 2.4892,
"step": 21204
},
{
"epoch": 13.97,
"learning_rate": 1e-07,
"loss": 2.4833,
"step": 21242
},
{
"epoch": 14.0,
"learning_rate": 1e-07,
"loss": 2.5363,
"step": 21280
},
{
"epoch": 14.03,
"learning_rate": 1e-07,
"loss": 2.5563,
"step": 21318
},
{
"epoch": 14.05,
"learning_rate": 1e-07,
"loss": 2.4392,
"step": 21356
},
{
"epoch": 14.07,
"learning_rate": 1e-07,
"loss": 2.4836,
"step": 21394
},
{
"epoch": 14.1,
"learning_rate": 1e-07,
"loss": 2.5705,
"step": 21432
},
{
"epoch": 14.12,
"learning_rate": 1e-07,
"loss": 2.494,
"step": 21470
},
{
"epoch": 14.15,
"learning_rate": 1e-07,
"loss": 2.5226,
"step": 21508
},
{
"epoch": 14.18,
"learning_rate": 1e-07,
"loss": 2.5026,
"step": 21546
},
{
"epoch": 14.2,
"learning_rate": 1e-07,
"loss": 2.4934,
"step": 21584
},
{
"epoch": 14.22,
"learning_rate": 1e-07,
"loss": 2.5057,
"step": 21622
},
{
"epoch": 14.25,
"learning_rate": 1e-07,
"loss": 2.4886,
"step": 21660
},
{
"epoch": 14.28,
"learning_rate": 1e-07,
"loss": 2.4816,
"step": 21698
},
{
"epoch": 14.3,
"learning_rate": 1e-07,
"loss": 2.4714,
"step": 21736
},
{
"epoch": 14.32,
"learning_rate": 1e-07,
"loss": 2.5459,
"step": 21774
},
{
"epoch": 14.35,
"learning_rate": 1e-07,
"loss": 2.549,
"step": 21812
},
{
"epoch": 14.38,
"learning_rate": 1e-07,
"loss": 2.4623,
"step": 21850
},
{
"epoch": 14.4,
"learning_rate": 1e-07,
"loss": 2.48,
"step": 21888
},
{
"epoch": 14.43,
"learning_rate": 1e-07,
"loss": 2.5529,
"step": 21926
},
{
"epoch": 14.45,
"learning_rate": 1e-07,
"loss": 2.5045,
"step": 21964
},
{
"epoch": 14.47,
"learning_rate": 1e-07,
"loss": 2.5599,
"step": 22002
},
{
"epoch": 14.5,
"learning_rate": 1e-07,
"loss": 2.4862,
"step": 22040
},
{
"epoch": 14.53,
"learning_rate": 1e-07,
"loss": 2.5145,
"step": 22078
},
{
"epoch": 14.55,
"learning_rate": 1e-07,
"loss": 2.4944,
"step": 22116
},
{
"epoch": 14.57,
"learning_rate": 1e-07,
"loss": 2.4581,
"step": 22154
},
{
"epoch": 14.6,
"learning_rate": 1e-07,
"loss": 2.552,
"step": 22192
},
{
"epoch": 14.62,
"learning_rate": 1e-07,
"loss": 2.5751,
"step": 22230
},
{
"epoch": 14.65,
"learning_rate": 1e-07,
"loss": 2.5383,
"step": 22268
},
{
"epoch": 14.68,
"learning_rate": 1e-07,
"loss": 2.5273,
"step": 22306
},
{
"epoch": 14.7,
"learning_rate": 1e-07,
"loss": 2.5118,
"step": 22344
},
{
"epoch": 14.72,
"learning_rate": 1e-07,
"loss": 2.5706,
"step": 22382
},
{
"epoch": 14.75,
"learning_rate": 1e-07,
"loss": 2.4765,
"step": 22420
},
{
"epoch": 14.78,
"learning_rate": 1e-07,
"loss": 2.4875,
"step": 22458
},
{
"epoch": 14.8,
"learning_rate": 1e-07,
"loss": 2.5111,
"step": 22496
},
{
"epoch": 14.82,
"learning_rate": 1e-07,
"loss": 2.4927,
"step": 22534
},
{
"epoch": 14.85,
"learning_rate": 1e-07,
"loss": 2.4156,
"step": 22572
},
{
"epoch": 14.88,
"learning_rate": 1e-07,
"loss": 2.5199,
"step": 22610
},
{
"epoch": 14.9,
"learning_rate": 1e-07,
"loss": 2.4974,
"step": 22648
},
{
"epoch": 14.93,
"learning_rate": 1e-07,
"loss": 2.5177,
"step": 22686
},
{
"epoch": 14.95,
"learning_rate": 1e-07,
"loss": 2.4438,
"step": 22724
},
{
"epoch": 14.97,
"learning_rate": 1e-07,
"loss": 2.4829,
"step": 22762
},
{
"epoch": 15.0,
"learning_rate": 1e-07,
"loss": 2.544,
"step": 22800
},
{
"epoch": 15.03,
"learning_rate": 1e-07,
"loss": 2.5036,
"step": 22838
},
{
"epoch": 15.05,
"learning_rate": 1e-07,
"loss": 2.4167,
"step": 22876
},
{
"epoch": 15.07,
"learning_rate": 1e-07,
"loss": 2.5017,
"step": 22914
},
{
"epoch": 15.1,
"learning_rate": 1e-07,
"loss": 2.4957,
"step": 22952
},
{
"epoch": 15.12,
"learning_rate": 1e-07,
"loss": 2.4176,
"step": 22990
},
{
"epoch": 15.15,
"learning_rate": 1e-07,
"loss": 2.535,
"step": 23028
},
{
"epoch": 15.18,
"learning_rate": 1e-07,
"loss": 2.5306,
"step": 23066
},
{
"epoch": 15.2,
"learning_rate": 1e-07,
"loss": 2.4867,
"step": 23104
},
{
"epoch": 15.22,
"learning_rate": 1e-07,
"loss": 2.5182,
"step": 23142
},
{
"epoch": 15.25,
"learning_rate": 1e-07,
"loss": 2.4443,
"step": 23180
},
{
"epoch": 15.28,
"learning_rate": 1e-07,
"loss": 2.4416,
"step": 23218
},
{
"epoch": 15.3,
"learning_rate": 1e-07,
"loss": 2.4665,
"step": 23256
},
{
"epoch": 15.32,
"learning_rate": 1e-07,
"loss": 2.4292,
"step": 23294
},
{
"epoch": 15.35,
"learning_rate": 1e-07,
"loss": 2.5607,
"step": 23332
},
{
"epoch": 15.38,
"learning_rate": 1e-07,
"loss": 2.4473,
"step": 23370
},
{
"epoch": 15.4,
"learning_rate": 1e-07,
"loss": 2.522,
"step": 23408
},
{
"epoch": 15.43,
"learning_rate": 1e-07,
"loss": 2.5633,
"step": 23446
},
{
"epoch": 15.45,
"learning_rate": 1e-07,
"loss": 2.4707,
"step": 23484
},
{
"epoch": 15.47,
"learning_rate": 1e-07,
"loss": 2.5822,
"step": 23522
},
{
"epoch": 15.5,
"learning_rate": 1e-07,
"loss": 2.4354,
"step": 23560
},
{
"epoch": 15.53,
"learning_rate": 1e-07,
"loss": 2.337,
"step": 23598
},
{
"epoch": 15.55,
"learning_rate": 1e-07,
"loss": 2.4321,
"step": 23636
},
{
"epoch": 15.57,
"learning_rate": 1e-07,
"loss": 2.453,
"step": 23674
},
{
"epoch": 15.6,
"learning_rate": 1e-07,
"loss": 2.4727,
"step": 23712
},
{
"epoch": 15.62,
"learning_rate": 1e-07,
"loss": 2.5775,
"step": 23750
},
{
"epoch": 15.65,
"learning_rate": 1e-07,
"loss": 2.4485,
"step": 23788
},
{
"epoch": 15.68,
"learning_rate": 1e-07,
"loss": 2.4905,
"step": 23826
},
{
"epoch": 15.7,
"learning_rate": 1e-07,
"loss": 2.486,
"step": 23864
},
{
"epoch": 15.72,
"learning_rate": 1e-07,
"loss": 2.4907,
"step": 23902
},
{
"epoch": 15.75,
"learning_rate": 1e-07,
"loss": 2.4584,
"step": 23940
},
{
"epoch": 15.78,
"learning_rate": 1e-07,
"loss": 2.5427,
"step": 23978
},
{
"epoch": 15.8,
"learning_rate": 1e-07,
"loss": 2.5468,
"step": 24016
},
{
"epoch": 15.82,
"learning_rate": 1e-07,
"loss": 2.5179,
"step": 24054
},
{
"epoch": 15.85,
"learning_rate": 1e-07,
"loss": 2.4527,
"step": 24092
},
{
"epoch": 15.88,
"learning_rate": 1e-07,
"loss": 2.4912,
"step": 24130
},
{
"epoch": 15.9,
"learning_rate": 1e-07,
"loss": 2.5246,
"step": 24168
},
{
"epoch": 15.93,
"learning_rate": 1e-07,
"loss": 2.4518,
"step": 24206
},
{
"epoch": 15.95,
"learning_rate": 1e-07,
"loss": 2.4702,
"step": 24244
},
{
"epoch": 15.97,
"learning_rate": 1e-07,
"loss": 2.4807,
"step": 24282
},
{
"epoch": 16.0,
"learning_rate": 1e-07,
"loss": 2.5016,
"step": 24320
},
{
"epoch": 16.02,
"learning_rate": 1e-07,
"loss": 2.4547,
"step": 24358
},
{
"epoch": 16.05,
"learning_rate": 1e-07,
"loss": 2.5046,
"step": 24396
},
{
"epoch": 16.07,
"learning_rate": 1e-07,
"loss": 2.4804,
"step": 24434
},
{
"epoch": 16.1,
"learning_rate": 1e-07,
"loss": 2.439,
"step": 24472
},
{
"epoch": 16.12,
"learning_rate": 1e-07,
"loss": 2.4343,
"step": 24510
},
{
"epoch": 16.15,
"learning_rate": 1e-07,
"loss": 2.5387,
"step": 24548
},
{
"epoch": 16.18,
"learning_rate": 1e-07,
"loss": 2.4459,
"step": 24586
},
{
"epoch": 16.2,
"learning_rate": 1e-07,
"loss": 2.4423,
"step": 24624
},
{
"epoch": 16.23,
"learning_rate": 1e-07,
"loss": 2.5521,
"step": 24662
},
{
"epoch": 16.25,
"learning_rate": 1e-07,
"loss": 2.5029,
"step": 24700
},
{
"epoch": 16.27,
"learning_rate": 1e-07,
"loss": 2.5005,
"step": 24738
},
{
"epoch": 16.3,
"learning_rate": 1e-07,
"loss": 2.4821,
"step": 24776
},
{
"epoch": 16.32,
"learning_rate": 1e-07,
"loss": 2.4868,
"step": 24814
},
{
"epoch": 16.35,
"learning_rate": 1e-07,
"loss": 2.4993,
"step": 24852
},
{
"epoch": 16.38,
"learning_rate": 1e-07,
"loss": 2.4132,
"step": 24890
},
{
"epoch": 16.4,
"learning_rate": 1e-07,
"loss": 2.4347,
"step": 24928
},
{
"epoch": 16.43,
"learning_rate": 1e-07,
"loss": 2.4667,
"step": 24966
},
{
"epoch": 16.45,
"learning_rate": 1e-07,
"loss": 2.4252,
"step": 25004
},
{
"epoch": 16.48,
"learning_rate": 1e-07,
"loss": 2.4267,
"step": 25042
},
{
"epoch": 16.5,
"learning_rate": 1e-07,
"loss": 2.4384,
"step": 25080
},
{
"epoch": 16.52,
"learning_rate": 1e-07,
"loss": 2.5273,
"step": 25118
},
{
"epoch": 16.55,
"learning_rate": 1e-07,
"loss": 2.5036,
"step": 25156
},
{
"epoch": 16.57,
"learning_rate": 1e-07,
"loss": 2.5159,
"step": 25194
},
{
"epoch": 16.6,
"learning_rate": 1e-07,
"loss": 2.4323,
"step": 25232
},
{
"epoch": 16.62,
"learning_rate": 1e-07,
"loss": 2.3885,
"step": 25270
},
{
"epoch": 16.65,
"learning_rate": 1e-07,
"loss": 2.4609,
"step": 25308
},
{
"epoch": 16.68,
"learning_rate": 1e-07,
"loss": 2.4227,
"step": 25346
},
{
"epoch": 16.7,
"learning_rate": 1e-07,
"loss": 2.5008,
"step": 25384
},
{
"epoch": 16.73,
"learning_rate": 1e-07,
"loss": 2.4119,
"step": 25422
},
{
"epoch": 16.75,
"learning_rate": 1e-07,
"loss": 2.4854,
"step": 25460
},
{
"epoch": 16.77,
"learning_rate": 1e-07,
"loss": 2.4073,
"step": 25498
},
{
"epoch": 16.8,
"learning_rate": 1e-07,
"loss": 2.4732,
"step": 25536
},
{
"epoch": 16.82,
"learning_rate": 1e-07,
"loss": 2.5109,
"step": 25574
},
{
"epoch": 16.85,
"learning_rate": 1e-07,
"loss": 2.4875,
"step": 25612
},
{
"epoch": 16.88,
"learning_rate": 1e-07,
"loss": 2.4114,
"step": 25650
},
{
"epoch": 16.9,
"learning_rate": 1e-07,
"loss": 2.5228,
"step": 25688
},
{
"epoch": 16.93,
"learning_rate": 1e-07,
"loss": 2.467,
"step": 25726
},
{
"epoch": 16.95,
"learning_rate": 1e-07,
"loss": 2.4497,
"step": 25764
},
{
"epoch": 16.98,
"learning_rate": 1e-07,
"loss": 2.4725,
"step": 25802
},
{
"epoch": 17.0,
"learning_rate": 1e-07,
"loss": 2.4368,
"step": 25840
},
{
"epoch": 17.02,
"learning_rate": 1e-07,
"loss": 2.4846,
"step": 25878
},
{
"epoch": 17.05,
"learning_rate": 1e-07,
"loss": 2.3977,
"step": 25916
},
{
"epoch": 17.07,
"learning_rate": 1e-07,
"loss": 2.4389,
"step": 25954
},
{
"epoch": 17.1,
"learning_rate": 1e-07,
"loss": 2.4865,
"step": 25992
},
{
"epoch": 17.12,
"learning_rate": 1e-07,
"loss": 2.3666,
"step": 26030
},
{
"epoch": 17.15,
"learning_rate": 1e-07,
"loss": 2.5045,
"step": 26068
},
{
"epoch": 17.18,
"learning_rate": 1e-07,
"loss": 2.4607,
"step": 26106
},
{
"epoch": 17.2,
"learning_rate": 1e-07,
"loss": 2.5089,
"step": 26144
},
{
"epoch": 17.23,
"learning_rate": 1e-07,
"loss": 2.4729,
"step": 26182
},
{
"epoch": 17.25,
"learning_rate": 1e-07,
"loss": 2.4228,
"step": 26220
},
{
"epoch": 17.27,
"learning_rate": 1e-07,
"loss": 2.4237,
"step": 26258
},
{
"epoch": 17.3,
"learning_rate": 1e-07,
"loss": 2.4593,
"step": 26296
},
{
"epoch": 17.32,
"learning_rate": 1e-07,
"loss": 2.4044,
"step": 26334
},
{
"epoch": 17.35,
"learning_rate": 1e-07,
"loss": 2.5488,
"step": 26372
},
{
"epoch": 17.38,
"learning_rate": 1e-07,
"loss": 2.3409,
"step": 26410
},
{
"epoch": 17.4,
"learning_rate": 1e-07,
"loss": 2.3963,
"step": 26448
},
{
"epoch": 17.43,
"learning_rate": 1e-07,
"loss": 2.4953,
"step": 26486
},
{
"epoch": 17.45,
"learning_rate": 1e-07,
"loss": 2.4604,
"step": 26524
},
{
"epoch": 17.48,
"learning_rate": 1e-07,
"loss": 2.4211,
"step": 26562
},
{
"epoch": 17.5,
"learning_rate": 1e-07,
"loss": 2.4791,
"step": 26600
},
{
"epoch": 17.52,
"learning_rate": 1e-07,
"loss": 2.442,
"step": 26638
},
{
"epoch": 17.55,
"learning_rate": 1e-07,
"loss": 2.4315,
"step": 26676
},
{
"epoch": 17.57,
"learning_rate": 1e-07,
"loss": 2.4432,
"step": 26714
},
{
"epoch": 17.6,
"learning_rate": 1e-07,
"loss": 2.4599,
"step": 26752
},
{
"epoch": 17.62,
"learning_rate": 1e-07,
"loss": 2.5794,
"step": 26790
},
{
"epoch": 17.65,
"learning_rate": 1e-07,
"loss": 2.4263,
"step": 26828
},
{
"epoch": 17.68,
"learning_rate": 1e-07,
"loss": 2.4788,
"step": 26866
},
{
"epoch": 17.7,
"learning_rate": 1e-07,
"loss": 2.4702,
"step": 26904
},
{
"epoch": 17.73,
"learning_rate": 1e-07,
"loss": 2.4099,
"step": 26942
},
{
"epoch": 17.75,
"learning_rate": 1e-07,
"loss": 2.3706,
"step": 26980
},
{
"epoch": 17.77,
"learning_rate": 1e-07,
"loss": 2.3648,
"step": 27018
},
{
"epoch": 17.8,
"learning_rate": 1e-07,
"loss": 2.4577,
"step": 27056
},
{
"epoch": 17.82,
"learning_rate": 1e-07,
"loss": 2.454,
"step": 27094
},
{
"epoch": 17.85,
"learning_rate": 1e-07,
"loss": 2.4376,
"step": 27132
},
{
"epoch": 17.88,
"learning_rate": 1e-07,
"loss": 2.4718,
"step": 27170
},
{
"epoch": 17.9,
"learning_rate": 1e-07,
"loss": 2.4469,
"step": 27208
},
{
"epoch": 17.93,
"learning_rate": 1e-07,
"loss": 2.4622,
"step": 27246
},
{
"epoch": 17.95,
"learning_rate": 1e-07,
"loss": 2.4444,
"step": 27284
},
{
"epoch": 17.98,
"learning_rate": 1e-07,
"loss": 2.4595,
"step": 27322
},
{
"epoch": 18.0,
"learning_rate": 1e-07,
"loss": 2.4624,
"step": 27360
},
{
"epoch": 18.0,
"step": 27360,
"total_flos": 4.063318154359603e+17,
"train_loss": 3.0161131058520043,
"train_runtime": 38619.075,
"train_samples_per_second": 1.417,
"train_steps_per_second": 0.708
}
],
"max_steps": 27360,
"num_train_epochs": 18,
"total_flos": 4.063318154359603e+17,
"trial_name": null,
"trial_params": null
}