SushantGautam commited on
Commit
3d4422d
1 Parent(s): 139e2d2

Training in progress, step 500

Browse files
all_results.json CHANGED
@@ -20,9 +20,9 @@
20
  "predict_samples": 1568,
21
  "predict_samples_per_second": 0.99,
22
  "predict_steps_per_second": 0.042,
23
- "train_loss": 1.6920310260382014,
24
- "train_runtime": 827.5296,
25
  "train_samples": 6271,
26
- "train_samples_per_second": 22.734,
27
- "train_steps_per_second": 0.95
28
  }
 
20
  "predict_samples": 1568,
21
  "predict_samples_per_second": 0.99,
22
  "predict_steps_per_second": 0.042,
23
+ "train_loss": 0.5571378069069549,
24
+ "train_runtime": 308.6292,
25
  "train_samples": 6271,
26
+ "train_samples_per_second": 60.957,
27
+ "train_steps_per_second": 2.547
28
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7baea5f6d4d4cb969638d4204f9ecb1ad6a1aea7a6402c373495d1eca0b71e85
3
  size 647678513
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d4287e1a75d90d08ad45df3bb8f31d1d2e3680120d1334780594dc984516ddb
3
  size 647678513
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7429bd0a6bae8c971623be70159e055d55cbbcd9b54c6fd225b3141300daf5ff
3
- size 2108685
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:783c7e86aedf79dd0aae603805c9591f397ca4475620b58f9a9b78151e2cfb7c
3
+ size 2108586
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 1.6920310260382014,
4
- "train_runtime": 827.5296,
5
  "train_samples": 6271,
6
- "train_samples_per_second": 22.734,
7
- "train_steps_per_second": 0.95
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.5571378069069549,
4
+ "train_runtime": 308.6292,
5
  "train_samples": 6271,
6
+ "train_samples_per_second": 60.957,
7
+ "train_steps_per_second": 2.547
8
  }
trainer_state.json CHANGED
@@ -3010,1727 +3010,1727 @@
3010
  {
3011
  "epoch": 1.91,
3012
  "learning_rate": 1.812977099236641e-05,
3013
- "loss": 1.4986,
3014
  "step": 501
3015
  },
3016
  {
3017
  "epoch": 1.92,
3018
  "learning_rate": 1.8066157760814252e-05,
3019
- "loss": 1.622,
3020
  "step": 502
3021
  },
3022
  {
3023
  "epoch": 1.92,
3024
  "learning_rate": 1.800254452926209e-05,
3025
- "loss": 1.5716,
3026
  "step": 503
3027
  },
3028
  {
3029
  "epoch": 1.92,
3030
  "learning_rate": 1.7938931297709923e-05,
3031
- "loss": 1.676,
3032
  "step": 504
3033
  },
3034
  {
3035
  "epoch": 1.93,
3036
  "learning_rate": 1.7875318066157763e-05,
3037
- "loss": 1.5049,
3038
  "step": 505
3039
  },
3040
  {
3041
  "epoch": 1.93,
3042
  "learning_rate": 1.78117048346056e-05,
3043
- "loss": 1.5857,
3044
  "step": 506
3045
  },
3046
  {
3047
  "epoch": 1.94,
3048
  "learning_rate": 1.7748091603053434e-05,
3049
- "loss": 1.5618,
3050
  "step": 507
3051
  },
3052
  {
3053
  "epoch": 1.94,
3054
  "learning_rate": 1.7684478371501274e-05,
3055
- "loss": 1.623,
3056
  "step": 508
3057
  },
3058
  {
3059
  "epoch": 1.94,
3060
  "learning_rate": 1.762086513994911e-05,
3061
- "loss": 1.4365,
3062
  "step": 509
3063
  },
3064
  {
3065
  "epoch": 1.95,
3066
  "learning_rate": 1.7557251908396945e-05,
3067
- "loss": 1.5403,
3068
  "step": 510
3069
  },
3070
  {
3071
  "epoch": 1.95,
3072
  "learning_rate": 1.7493638676844786e-05,
3073
- "loss": 1.5295,
3074
  "step": 511
3075
  },
3076
  {
3077
  "epoch": 1.95,
3078
  "learning_rate": 1.7430025445292623e-05,
3079
- "loss": 1.538,
3080
  "step": 512
3081
  },
3082
  {
3083
  "epoch": 1.96,
3084
  "learning_rate": 1.736641221374046e-05,
3085
- "loss": 1.5656,
3086
  "step": 513
3087
  },
3088
  {
3089
  "epoch": 1.96,
3090
  "learning_rate": 1.7302798982188297e-05,
3091
- "loss": 1.5472,
3092
  "step": 514
3093
  },
3094
  {
3095
  "epoch": 1.97,
3096
  "learning_rate": 1.7239185750636134e-05,
3097
- "loss": 1.5966,
3098
  "step": 515
3099
  },
3100
  {
3101
  "epoch": 1.97,
3102
  "learning_rate": 1.717557251908397e-05,
3103
- "loss": 1.4952,
3104
  "step": 516
3105
  },
3106
  {
3107
  "epoch": 1.97,
3108
  "learning_rate": 1.7111959287531808e-05,
3109
- "loss": 1.6362,
3110
  "step": 517
3111
  },
3112
  {
3113
  "epoch": 1.98,
3114
  "learning_rate": 1.7048346055979645e-05,
3115
- "loss": 1.5589,
3116
  "step": 518
3117
  },
3118
  {
3119
  "epoch": 1.98,
3120
  "learning_rate": 1.6984732824427482e-05,
3121
- "loss": 1.6856,
3122
  "step": 519
3123
  },
3124
  {
3125
  "epoch": 1.98,
3126
  "learning_rate": 1.692111959287532e-05,
3127
- "loss": 1.473,
3128
  "step": 520
3129
  },
3130
  {
3131
  "epoch": 1.99,
3132
  "learning_rate": 1.6857506361323156e-05,
3133
- "loss": 1.6013,
3134
  "step": 521
3135
  },
3136
  {
3137
  "epoch": 1.99,
3138
  "learning_rate": 1.6793893129770993e-05,
3139
- "loss": 1.5531,
3140
  "step": 522
3141
  },
3142
  {
3143
  "epoch": 2.0,
3144
  "learning_rate": 1.673027989821883e-05,
3145
- "loss": 1.5726,
3146
  "step": 523
3147
  },
3148
  {
3149
  "epoch": 2.0,
3150
  "learning_rate": 1.6666666666666667e-05,
3151
- "loss": 1.5935,
3152
  "step": 524
3153
  },
3154
  {
3155
  "epoch": 2.0,
3156
  "learning_rate": 1.6603053435114505e-05,
3157
- "loss": 1.4443,
3158
  "step": 525
3159
  },
3160
  {
3161
  "epoch": 2.01,
3162
  "learning_rate": 1.653944020356234e-05,
3163
- "loss": 1.479,
3164
  "step": 526
3165
  },
3166
  {
3167
  "epoch": 2.01,
3168
  "learning_rate": 1.647582697201018e-05,
3169
- "loss": 1.531,
3170
  "step": 527
3171
  },
3172
  {
3173
  "epoch": 2.02,
3174
  "learning_rate": 1.6412213740458016e-05,
3175
- "loss": 1.4709,
3176
  "step": 528
3177
  },
3178
  {
3179
  "epoch": 2.02,
3180
  "learning_rate": 1.6348600508905853e-05,
3181
- "loss": 1.4644,
3182
  "step": 529
3183
  },
3184
  {
3185
  "epoch": 2.02,
3186
  "learning_rate": 1.628498727735369e-05,
3187
- "loss": 1.4627,
3188
  "step": 530
3189
  },
3190
  {
3191
  "epoch": 2.03,
3192
  "learning_rate": 1.6221374045801527e-05,
3193
- "loss": 1.4694,
3194
  "step": 531
3195
  },
3196
  {
3197
  "epoch": 2.03,
3198
  "learning_rate": 1.6157760814249364e-05,
3199
- "loss": 1.5166,
3200
  "step": 532
3201
  },
3202
  {
3203
  "epoch": 2.03,
3204
  "learning_rate": 1.60941475826972e-05,
3205
- "loss": 1.5286,
3206
  "step": 533
3207
  },
3208
  {
3209
  "epoch": 2.04,
3210
  "learning_rate": 1.6030534351145038e-05,
3211
- "loss": 1.3843,
3212
  "step": 534
3213
  },
3214
  {
3215
  "epoch": 2.04,
3216
  "learning_rate": 1.5966921119592875e-05,
3217
- "loss": 1.496,
3218
  "step": 535
3219
  },
3220
  {
3221
  "epoch": 2.05,
3222
  "learning_rate": 1.5903307888040712e-05,
3223
- "loss": 1.429,
3224
  "step": 536
3225
  },
3226
  {
3227
  "epoch": 2.05,
3228
  "learning_rate": 1.583969465648855e-05,
3229
- "loss": 1.5571,
3230
  "step": 537
3231
  },
3232
  {
3233
  "epoch": 2.05,
3234
  "learning_rate": 1.5776081424936386e-05,
3235
- "loss": 1.6211,
3236
  "step": 538
3237
  },
3238
  {
3239
  "epoch": 2.06,
3240
  "learning_rate": 1.5712468193384224e-05,
3241
- "loss": 1.4718,
3242
  "step": 539
3243
  },
3244
  {
3245
  "epoch": 2.06,
3246
  "learning_rate": 1.5648854961832064e-05,
3247
- "loss": 1.6864,
3248
  "step": 540
3249
  },
3250
  {
3251
  "epoch": 2.06,
3252
  "learning_rate": 1.5585241730279898e-05,
3253
- "loss": 1.5174,
3254
  "step": 541
3255
  },
3256
  {
3257
  "epoch": 2.07,
3258
  "learning_rate": 1.5521628498727735e-05,
3259
- "loss": 1.629,
3260
  "step": 542
3261
  },
3262
  {
3263
  "epoch": 2.07,
3264
  "learning_rate": 1.5458015267175575e-05,
3265
- "loss": 1.6602,
3266
  "step": 543
3267
  },
3268
  {
3269
  "epoch": 2.08,
3270
  "learning_rate": 1.539440203562341e-05,
3271
- "loss": 1.5541,
3272
  "step": 544
3273
  },
3274
  {
3275
  "epoch": 2.08,
3276
  "learning_rate": 1.5330788804071246e-05,
3277
- "loss": 1.5774,
3278
  "step": 545
3279
  },
3280
  {
3281
  "epoch": 2.08,
3282
  "learning_rate": 1.5267175572519086e-05,
3283
- "loss": 1.4806,
3284
  "step": 546
3285
  },
3286
  {
3287
  "epoch": 2.09,
3288
  "learning_rate": 1.5203562340966923e-05,
3289
- "loss": 1.4604,
3290
  "step": 547
3291
  },
3292
  {
3293
  "epoch": 2.09,
3294
  "learning_rate": 1.5139949109414759e-05,
3295
- "loss": 1.533,
3296
  "step": 548
3297
  },
3298
  {
3299
  "epoch": 2.1,
3300
  "learning_rate": 1.5076335877862596e-05,
3301
- "loss": 1.5635,
3302
  "step": 549
3303
  },
3304
  {
3305
  "epoch": 2.1,
3306
  "learning_rate": 1.5012722646310435e-05,
3307
- "loss": 1.4374,
3308
  "step": 550
3309
  },
3310
  {
3311
  "epoch": 2.1,
3312
  "learning_rate": 1.494910941475827e-05,
3313
- "loss": 1.5468,
3314
  "step": 551
3315
  },
3316
  {
3317
  "epoch": 2.11,
3318
  "learning_rate": 1.4885496183206107e-05,
3319
- "loss": 1.5297,
3320
  "step": 552
3321
  },
3322
  {
3323
  "epoch": 2.11,
3324
  "learning_rate": 1.4821882951653946e-05,
3325
- "loss": 1.5429,
3326
  "step": 553
3327
  },
3328
  {
3329
  "epoch": 2.11,
3330
  "learning_rate": 1.4758269720101781e-05,
3331
- "loss": 1.5683,
3332
  "step": 554
3333
  },
3334
  {
3335
  "epoch": 2.12,
3336
  "learning_rate": 1.4694656488549618e-05,
3337
- "loss": 1.4634,
3338
  "step": 555
3339
  },
3340
  {
3341
  "epoch": 2.12,
3342
  "learning_rate": 1.4631043256997457e-05,
3343
- "loss": 1.4366,
3344
  "step": 556
3345
  },
3346
  {
3347
  "epoch": 2.13,
3348
  "learning_rate": 1.4567430025445294e-05,
3349
- "loss": 1.5887,
3350
  "step": 557
3351
  },
3352
  {
3353
  "epoch": 2.13,
3354
  "learning_rate": 1.450381679389313e-05,
3355
- "loss": 1.506,
3356
  "step": 558
3357
  },
3358
  {
3359
  "epoch": 2.13,
3360
  "learning_rate": 1.4440203562340968e-05,
3361
- "loss": 1.5557,
3362
  "step": 559
3363
  },
3364
  {
3365
  "epoch": 2.14,
3366
  "learning_rate": 1.4376590330788805e-05,
3367
- "loss": 1.3759,
3368
  "step": 560
3369
  },
3370
  {
3371
  "epoch": 2.14,
3372
  "learning_rate": 1.431297709923664e-05,
3373
- "loss": 1.5622,
3374
  "step": 561
3375
  },
3376
  {
3377
  "epoch": 2.15,
3378
  "learning_rate": 1.424936386768448e-05,
3379
- "loss": 1.6124,
3380
  "step": 562
3381
  },
3382
  {
3383
  "epoch": 2.15,
3384
  "learning_rate": 1.4185750636132317e-05,
3385
- "loss": 1.5158,
3386
  "step": 563
3387
  },
3388
  {
3389
  "epoch": 2.15,
3390
  "learning_rate": 1.4122137404580155e-05,
3391
- "loss": 1.543,
3392
  "step": 564
3393
  },
3394
  {
3395
  "epoch": 2.16,
3396
  "learning_rate": 1.4058524173027989e-05,
3397
- "loss": 1.5875,
3398
  "step": 565
3399
  },
3400
  {
3401
  "epoch": 2.16,
3402
  "learning_rate": 1.3994910941475828e-05,
3403
- "loss": 1.4717,
3404
  "step": 566
3405
  },
3406
  {
3407
  "epoch": 2.16,
3408
  "learning_rate": 1.3931297709923667e-05,
3409
- "loss": 1.532,
3410
  "step": 567
3411
  },
3412
  {
3413
  "epoch": 2.17,
3414
  "learning_rate": 1.38676844783715e-05,
3415
- "loss": 1.4993,
3416
  "step": 568
3417
  },
3418
  {
3419
  "epoch": 2.17,
3420
  "learning_rate": 1.3804071246819339e-05,
3421
- "loss": 1.494,
3422
  "step": 569
3423
  },
3424
  {
3425
  "epoch": 2.18,
3426
  "learning_rate": 1.3740458015267178e-05,
3427
- "loss": 1.4785,
3428
  "step": 570
3429
  },
3430
  {
3431
  "epoch": 2.18,
3432
  "learning_rate": 1.3676844783715011e-05,
3433
- "loss": 1.524,
3434
  "step": 571
3435
  },
3436
  {
3437
  "epoch": 2.18,
3438
  "learning_rate": 1.361323155216285e-05,
3439
- "loss": 1.4963,
3440
  "step": 572
3441
  },
3442
  {
3443
  "epoch": 2.19,
3444
  "learning_rate": 1.3549618320610687e-05,
3445
- "loss": 1.6755,
3446
  "step": 573
3447
  },
3448
  {
3449
  "epoch": 2.19,
3450
  "learning_rate": 1.3486005089058526e-05,
3451
- "loss": 1.553,
3452
  "step": 574
3453
  },
3454
  {
3455
  "epoch": 2.19,
3456
  "learning_rate": 1.3422391857506361e-05,
3457
- "loss": 1.5205,
3458
  "step": 575
3459
  },
3460
  {
3461
  "epoch": 2.2,
3462
  "learning_rate": 1.3358778625954198e-05,
3463
- "loss": 1.5081,
3464
  "step": 576
3465
  },
3466
  {
3467
  "epoch": 2.2,
3468
  "learning_rate": 1.3295165394402037e-05,
3469
- "loss": 1.5934,
3470
  "step": 577
3471
  },
3472
  {
3473
  "epoch": 2.21,
3474
  "learning_rate": 1.3231552162849873e-05,
3475
- "loss": 1.6574,
3476
  "step": 578
3477
  },
3478
  {
3479
  "epoch": 2.21,
3480
  "learning_rate": 1.316793893129771e-05,
3481
- "loss": 1.5433,
3482
  "step": 579
3483
  },
3484
  {
3485
  "epoch": 2.21,
3486
  "learning_rate": 1.3104325699745548e-05,
3487
- "loss": 1.5191,
3488
  "step": 580
3489
  },
3490
  {
3491
  "epoch": 2.22,
3492
  "learning_rate": 1.3040712468193386e-05,
3493
- "loss": 1.566,
3494
  "step": 581
3495
  },
3496
  {
3497
  "epoch": 2.22,
3498
  "learning_rate": 1.2977099236641221e-05,
3499
- "loss": 1.5619,
3500
  "step": 582
3501
  },
3502
  {
3503
  "epoch": 2.23,
3504
  "learning_rate": 1.291348600508906e-05,
3505
- "loss": 1.6038,
3506
  "step": 583
3507
  },
3508
  {
3509
  "epoch": 2.23,
3510
  "learning_rate": 1.2849872773536897e-05,
3511
- "loss": 1.47,
3512
  "step": 584
3513
  },
3514
  {
3515
  "epoch": 2.23,
3516
  "learning_rate": 1.2786259541984732e-05,
3517
- "loss": 1.4102,
3518
  "step": 585
3519
  },
3520
  {
3521
  "epoch": 2.24,
3522
  "learning_rate": 1.2722646310432571e-05,
3523
- "loss": 1.6131,
3524
  "step": 586
3525
  },
3526
  {
3527
  "epoch": 2.24,
3528
  "learning_rate": 1.2659033078880408e-05,
3529
- "loss": 1.4676,
3530
  "step": 587
3531
  },
3532
  {
3533
  "epoch": 2.24,
3534
  "learning_rate": 1.2595419847328243e-05,
3535
- "loss": 1.5365,
3536
  "step": 588
3537
  },
3538
  {
3539
  "epoch": 2.25,
3540
  "learning_rate": 1.2531806615776082e-05,
3541
- "loss": 1.6006,
3542
  "step": 589
3543
  },
3544
  {
3545
  "epoch": 2.25,
3546
  "learning_rate": 1.2468193384223919e-05,
3547
- "loss": 1.4513,
3548
  "step": 590
3549
  },
3550
  {
3551
  "epoch": 2.26,
3552
  "learning_rate": 1.2404580152671756e-05,
3553
- "loss": 1.5827,
3554
  "step": 591
3555
  },
3556
  {
3557
  "epoch": 2.26,
3558
  "learning_rate": 1.2340966921119595e-05,
3559
- "loss": 1.5173,
3560
  "step": 592
3561
  },
3562
  {
3563
  "epoch": 2.26,
3564
  "learning_rate": 1.227735368956743e-05,
3565
- "loss": 1.504,
3566
  "step": 593
3567
  },
3568
  {
3569
  "epoch": 2.27,
3570
  "learning_rate": 1.2213740458015267e-05,
3571
- "loss": 1.474,
3572
  "step": 594
3573
  },
3574
  {
3575
  "epoch": 2.27,
3576
  "learning_rate": 1.2150127226463104e-05,
3577
- "loss": 1.6105,
3578
  "step": 595
3579
  },
3580
  {
3581
  "epoch": 2.27,
3582
  "learning_rate": 1.2086513994910942e-05,
3583
- "loss": 1.5222,
3584
  "step": 596
3585
  },
3586
  {
3587
  "epoch": 2.28,
3588
  "learning_rate": 1.202290076335878e-05,
3589
- "loss": 1.5107,
3590
  "step": 597
3591
  },
3592
  {
3593
  "epoch": 2.28,
3594
  "learning_rate": 1.1959287531806616e-05,
3595
- "loss": 1.4677,
3596
  "step": 598
3597
  },
3598
  {
3599
  "epoch": 2.29,
3600
  "learning_rate": 1.1895674300254453e-05,
3601
- "loss": 1.4446,
3602
  "step": 599
3603
  },
3604
  {
3605
  "epoch": 2.29,
3606
  "learning_rate": 1.1832061068702292e-05,
3607
- "loss": 1.5138,
3608
  "step": 600
3609
  },
3610
  {
3611
  "epoch": 2.29,
3612
  "learning_rate": 1.1768447837150127e-05,
3613
- "loss": 1.4841,
3614
  "step": 601
3615
  },
3616
  {
3617
  "epoch": 2.3,
3618
  "learning_rate": 1.1704834605597966e-05,
3619
- "loss": 1.526,
3620
  "step": 602
3621
  },
3622
  {
3623
  "epoch": 2.3,
3624
  "learning_rate": 1.1641221374045803e-05,
3625
- "loss": 1.4385,
3626
  "step": 603
3627
  },
3628
  {
3629
  "epoch": 2.31,
3630
  "learning_rate": 1.1577608142493638e-05,
3631
- "loss": 1.5106,
3632
  "step": 604
3633
  },
3634
  {
3635
  "epoch": 2.31,
3636
  "learning_rate": 1.1513994910941477e-05,
3637
- "loss": 1.4731,
3638
  "step": 605
3639
  },
3640
  {
3641
  "epoch": 2.31,
3642
  "learning_rate": 1.1450381679389314e-05,
3643
- "loss": 1.4147,
3644
  "step": 606
3645
  },
3646
  {
3647
  "epoch": 2.32,
3648
  "learning_rate": 1.1386768447837151e-05,
3649
- "loss": 1.5427,
3650
  "step": 607
3651
  },
3652
  {
3653
  "epoch": 2.32,
3654
  "learning_rate": 1.1323155216284988e-05,
3655
- "loss": 1.5735,
3656
  "step": 608
3657
  },
3658
  {
3659
  "epoch": 2.32,
3660
  "learning_rate": 1.1259541984732823e-05,
3661
- "loss": 1.5151,
3662
  "step": 609
3663
  },
3664
  {
3665
  "epoch": 2.33,
3666
  "learning_rate": 1.1195928753180662e-05,
3667
- "loss": 1.4713,
3668
  "step": 610
3669
  },
3670
  {
3671
  "epoch": 2.33,
3672
  "learning_rate": 1.11323155216285e-05,
3673
- "loss": 1.5663,
3674
  "step": 611
3675
  },
3676
  {
3677
  "epoch": 2.34,
3678
  "learning_rate": 1.1068702290076336e-05,
3679
- "loss": 1.4052,
3680
  "step": 612
3681
  },
3682
  {
3683
  "epoch": 2.34,
3684
  "learning_rate": 1.1005089058524173e-05,
3685
- "loss": 1.5848,
3686
  "step": 613
3687
  },
3688
  {
3689
  "epoch": 2.34,
3690
  "learning_rate": 1.0941475826972012e-05,
3691
- "loss": 1.4521,
3692
  "step": 614
3693
  },
3694
  {
3695
  "epoch": 2.35,
3696
  "learning_rate": 1.0877862595419848e-05,
3697
- "loss": 1.5355,
3698
  "step": 615
3699
  },
3700
  {
3701
  "epoch": 2.35,
3702
  "learning_rate": 1.0814249363867685e-05,
3703
- "loss": 1.6279,
3704
  "step": 616
3705
  },
3706
  {
3707
  "epoch": 2.35,
3708
  "learning_rate": 1.0750636132315522e-05,
3709
- "loss": 1.4912,
3710
  "step": 617
3711
  },
3712
  {
3713
  "epoch": 2.36,
3714
  "learning_rate": 1.0687022900763359e-05,
3715
- "loss": 1.6003,
3716
  "step": 618
3717
  },
3718
  {
3719
  "epoch": 2.36,
3720
  "learning_rate": 1.0623409669211198e-05,
3721
- "loss": 1.4327,
3722
  "step": 619
3723
  },
3724
  {
3725
  "epoch": 2.37,
3726
  "learning_rate": 1.0559796437659033e-05,
3727
- "loss": 1.4725,
3728
  "step": 620
3729
  },
3730
  {
3731
  "epoch": 2.37,
3732
  "learning_rate": 1.049618320610687e-05,
3733
- "loss": 1.5927,
3734
  "step": 621
3735
  },
3736
  {
3737
  "epoch": 2.37,
3738
  "learning_rate": 1.0432569974554709e-05,
3739
- "loss": 1.4698,
3740
  "step": 622
3741
  },
3742
  {
3743
  "epoch": 2.38,
3744
  "learning_rate": 1.0368956743002544e-05,
3745
- "loss": 1.568,
3746
  "step": 623
3747
  },
3748
  {
3749
  "epoch": 2.38,
3750
  "learning_rate": 1.0305343511450383e-05,
3751
- "loss": 1.5145,
3752
  "step": 624
3753
  },
3754
  {
3755
  "epoch": 2.39,
3756
  "learning_rate": 1.024173027989822e-05,
3757
- "loss": 1.479,
3758
  "step": 625
3759
  },
3760
  {
3761
  "epoch": 2.39,
3762
  "learning_rate": 1.0178117048346055e-05,
3763
- "loss": 1.5341,
3764
  "step": 626
3765
  },
3766
  {
3767
  "epoch": 2.39,
3768
  "learning_rate": 1.0114503816793894e-05,
3769
- "loss": 1.455,
3770
  "step": 627
3771
  },
3772
  {
3773
  "epoch": 2.4,
3774
  "learning_rate": 1.0050890585241731e-05,
3775
- "loss": 1.5176,
3776
  "step": 628
3777
  },
3778
  {
3779
  "epoch": 2.4,
3780
  "learning_rate": 9.987277353689568e-06,
3781
- "loss": 1.5831,
3782
  "step": 629
3783
  },
3784
  {
3785
  "epoch": 2.4,
3786
  "learning_rate": 9.923664122137405e-06,
3787
- "loss": 1.6207,
3788
  "step": 630
3789
  },
3790
  {
3791
  "epoch": 2.41,
3792
  "learning_rate": 9.860050890585242e-06,
3793
- "loss": 1.5222,
3794
  "step": 631
3795
  },
3796
  {
3797
  "epoch": 2.41,
3798
  "learning_rate": 9.79643765903308e-06,
3799
- "loss": 1.4343,
3800
  "step": 632
3801
  },
3802
  {
3803
  "epoch": 2.42,
3804
  "learning_rate": 9.732824427480917e-06,
3805
- "loss": 1.4709,
3806
  "step": 633
3807
  },
3808
  {
3809
  "epoch": 2.42,
3810
  "learning_rate": 9.669211195928754e-06,
3811
- "loss": 1.5012,
3812
  "step": 634
3813
  },
3814
  {
3815
  "epoch": 2.42,
3816
  "learning_rate": 9.60559796437659e-06,
3817
- "loss": 1.4925,
3818
  "step": 635
3819
  },
3820
  {
3821
  "epoch": 2.43,
3822
  "learning_rate": 9.541984732824428e-06,
3823
- "loss": 1.4522,
3824
  "step": 636
3825
  },
3826
  {
3827
  "epoch": 2.43,
3828
  "learning_rate": 9.478371501272265e-06,
3829
- "loss": 1.513,
3830
  "step": 637
3831
  },
3832
  {
3833
  "epoch": 2.44,
3834
  "learning_rate": 9.414758269720102e-06,
3835
- "loss": 1.459,
3836
  "step": 638
3837
  },
3838
  {
3839
  "epoch": 2.44,
3840
  "learning_rate": 9.351145038167939e-06,
3841
- "loss": 1.4932,
3842
  "step": 639
3843
  },
3844
  {
3845
  "epoch": 2.44,
3846
  "learning_rate": 9.287531806615776e-06,
3847
- "loss": 1.4743,
3848
  "step": 640
3849
  },
3850
  {
3851
  "epoch": 2.45,
3852
  "learning_rate": 9.223918575063615e-06,
3853
- "loss": 1.522,
3854
  "step": 641
3855
  },
3856
  {
3857
  "epoch": 2.45,
3858
  "learning_rate": 9.16030534351145e-06,
3859
- "loss": 1.593,
3860
  "step": 642
3861
  },
3862
  {
3863
  "epoch": 2.45,
3864
  "learning_rate": 9.096692111959287e-06,
3865
- "loss": 1.6271,
3866
  "step": 643
3867
  },
3868
  {
3869
  "epoch": 2.46,
3870
  "learning_rate": 9.033078880407126e-06,
3871
- "loss": 1.5129,
3872
  "step": 644
3873
  },
3874
  {
3875
  "epoch": 2.46,
3876
  "learning_rate": 8.969465648854961e-06,
3877
- "loss": 1.4341,
3878
  "step": 645
3879
  },
3880
  {
3881
  "epoch": 2.47,
3882
  "learning_rate": 8.9058524173028e-06,
3883
- "loss": 1.5421,
3884
  "step": 646
3885
  },
3886
  {
3887
  "epoch": 2.47,
3888
  "learning_rate": 8.842239185750637e-06,
3889
- "loss": 1.5236,
3890
  "step": 647
3891
  },
3892
  {
3893
  "epoch": 2.47,
3894
  "learning_rate": 8.778625954198473e-06,
3895
- "loss": 1.5255,
3896
  "step": 648
3897
  },
3898
  {
3899
  "epoch": 2.48,
3900
  "learning_rate": 8.715012722646311e-06,
3901
- "loss": 1.4372,
3902
  "step": 649
3903
  },
3904
  {
3905
  "epoch": 2.48,
3906
  "learning_rate": 8.651399491094148e-06,
3907
- "loss": 1.5087,
3908
  "step": 650
3909
  },
3910
  {
3911
  "epoch": 2.48,
3912
  "learning_rate": 8.587786259541985e-06,
3913
- "loss": 1.5081,
3914
  "step": 651
3915
  },
3916
  {
3917
  "epoch": 2.49,
3918
  "learning_rate": 8.524173027989823e-06,
3919
- "loss": 1.4761,
3920
  "step": 652
3921
  },
3922
  {
3923
  "epoch": 2.49,
3924
  "learning_rate": 8.46055979643766e-06,
3925
- "loss": 1.4252,
3926
  "step": 653
3927
  },
3928
  {
3929
  "epoch": 2.5,
3930
  "learning_rate": 8.396946564885497e-06,
3931
- "loss": 1.5772,
3932
  "step": 654
3933
  },
3934
  {
3935
  "epoch": 2.5,
3936
  "learning_rate": 8.333333333333334e-06,
3937
- "loss": 1.3853,
3938
  "step": 655
3939
  },
3940
  {
3941
  "epoch": 2.5,
3942
  "learning_rate": 8.26972010178117e-06,
3943
- "loss": 1.5134,
3944
  "step": 656
3945
  },
3946
  {
3947
  "epoch": 2.51,
3948
  "learning_rate": 8.206106870229008e-06,
3949
- "loss": 1.4493,
3950
  "step": 657
3951
  },
3952
  {
3953
  "epoch": 2.51,
3954
  "learning_rate": 8.142493638676845e-06,
3955
- "loss": 1.4715,
3956
  "step": 658
3957
  },
3958
  {
3959
  "epoch": 2.52,
3960
  "learning_rate": 8.078880407124682e-06,
3961
- "loss": 1.5163,
3962
  "step": 659
3963
  },
3964
  {
3965
  "epoch": 2.52,
3966
  "learning_rate": 8.015267175572519e-06,
3967
- "loss": 1.5462,
3968
  "step": 660
3969
  },
3970
  {
3971
  "epoch": 2.52,
3972
  "learning_rate": 7.951653944020356e-06,
3973
- "loss": 1.6523,
3974
  "step": 661
3975
  },
3976
  {
3977
  "epoch": 2.53,
3978
  "learning_rate": 7.888040712468193e-06,
3979
- "loss": 1.6249,
3980
  "step": 662
3981
  },
3982
  {
3983
  "epoch": 2.53,
3984
  "learning_rate": 7.824427480916032e-06,
3985
- "loss": 1.6846,
3986
  "step": 663
3987
  },
3988
  {
3989
  "epoch": 2.53,
3990
  "learning_rate": 7.760814249363867e-06,
3991
- "loss": 1.6154,
3992
  "step": 664
3993
  },
3994
  {
3995
  "epoch": 2.54,
3996
  "learning_rate": 7.697201017811704e-06,
3997
- "loss": 1.6876,
3998
  "step": 665
3999
  },
4000
  {
4001
  "epoch": 2.54,
4002
  "learning_rate": 7.633587786259543e-06,
4003
- "loss": 1.506,
4004
  "step": 666
4005
  },
4006
  {
4007
  "epoch": 2.55,
4008
  "learning_rate": 7.569974554707379e-06,
4009
- "loss": 1.5859,
4010
  "step": 667
4011
  },
4012
  {
4013
  "epoch": 2.55,
4014
  "learning_rate": 7.506361323155217e-06,
4015
- "loss": 1.5522,
4016
  "step": 668
4017
  },
4018
  {
4019
  "epoch": 2.55,
4020
  "learning_rate": 7.4427480916030536e-06,
4021
- "loss": 1.6109,
4022
  "step": 669
4023
  },
4024
  {
4025
  "epoch": 2.56,
4026
  "learning_rate": 7.379134860050891e-06,
4027
- "loss": 1.5327,
4028
  "step": 670
4029
  },
4030
  {
4031
  "epoch": 2.56,
4032
  "learning_rate": 7.3155216284987285e-06,
4033
- "loss": 1.4753,
4034
  "step": 671
4035
  },
4036
  {
4037
  "epoch": 2.56,
4038
  "learning_rate": 7.251908396946565e-06,
4039
- "loss": 1.5325,
4040
  "step": 672
4041
  },
4042
  {
4043
  "epoch": 2.57,
4044
  "learning_rate": 7.188295165394403e-06,
4045
- "loss": 1.5384,
4046
  "step": 673
4047
  },
4048
  {
4049
  "epoch": 2.57,
4050
  "learning_rate": 7.12468193384224e-06,
4051
- "loss": 1.5745,
4052
  "step": 674
4053
  },
4054
  {
4055
  "epoch": 2.58,
4056
  "learning_rate": 7.061068702290078e-06,
4057
- "loss": 1.4398,
4058
  "step": 675
4059
  },
4060
  {
4061
  "epoch": 2.58,
4062
  "learning_rate": 6.997455470737914e-06,
4063
- "loss": 1.4848,
4064
  "step": 676
4065
  },
4066
  {
4067
  "epoch": 2.58,
4068
  "learning_rate": 6.93384223918575e-06,
4069
- "loss": 1.3778,
4070
  "step": 677
4071
  },
4072
  {
4073
  "epoch": 2.59,
4074
  "learning_rate": 6.870229007633589e-06,
4075
- "loss": 1.5155,
4076
  "step": 678
4077
  },
4078
  {
4079
  "epoch": 2.59,
4080
  "learning_rate": 6.806615776081425e-06,
4081
- "loss": 1.5289,
4082
  "step": 679
4083
  },
4084
  {
4085
  "epoch": 2.6,
4086
  "learning_rate": 6.743002544529263e-06,
4087
- "loss": 1.5638,
4088
  "step": 680
4089
  },
4090
  {
4091
  "epoch": 2.6,
4092
  "learning_rate": 6.679389312977099e-06,
4093
- "loss": 1.3992,
4094
  "step": 681
4095
  },
4096
  {
4097
  "epoch": 2.6,
4098
  "learning_rate": 6.615776081424936e-06,
4099
- "loss": 1.5364,
4100
  "step": 682
4101
  },
4102
  {
4103
  "epoch": 2.61,
4104
  "learning_rate": 6.552162849872774e-06,
4105
- "loss": 1.5832,
4106
  "step": 683
4107
  },
4108
  {
4109
  "epoch": 2.61,
4110
  "learning_rate": 6.4885496183206104e-06,
4111
- "loss": 1.4741,
4112
  "step": 684
4113
  },
4114
  {
4115
  "epoch": 2.61,
4116
  "learning_rate": 6.424936386768448e-06,
4117
- "loss": 1.5595,
4118
  "step": 685
4119
  },
4120
  {
4121
  "epoch": 2.62,
4122
  "learning_rate": 6.3613231552162854e-06,
4123
- "loss": 1.5912,
4124
  "step": 686
4125
  },
4126
  {
4127
  "epoch": 2.62,
4128
  "learning_rate": 6.297709923664122e-06,
4129
- "loss": 1.5435,
4130
  "step": 687
4131
  },
4132
  {
4133
  "epoch": 2.63,
4134
  "learning_rate": 6.2340966921119596e-06,
4135
- "loss": 1.4681,
4136
  "step": 688
4137
  },
4138
  {
4139
  "epoch": 2.63,
4140
  "learning_rate": 6.1704834605597975e-06,
4141
- "loss": 1.4535,
4142
  "step": 689
4143
  },
4144
  {
4145
  "epoch": 2.63,
4146
  "learning_rate": 6.106870229007634e-06,
4147
- "loss": 1.575,
4148
  "step": 690
4149
  },
4150
  {
4151
  "epoch": 2.64,
4152
  "learning_rate": 6.043256997455471e-06,
4153
- "loss": 1.4966,
4154
  "step": 691
4155
  },
4156
  {
4157
  "epoch": 2.64,
4158
  "learning_rate": 5.979643765903308e-06,
4159
- "loss": 1.5858,
4160
  "step": 692
4161
  },
4162
  {
4163
  "epoch": 2.65,
4164
  "learning_rate": 5.916030534351146e-06,
4165
- "loss": 1.4563,
4166
  "step": 693
4167
  },
4168
  {
4169
  "epoch": 2.65,
4170
  "learning_rate": 5.852417302798983e-06,
4171
- "loss": 1.4964,
4172
  "step": 694
4173
  },
4174
  {
4175
  "epoch": 2.65,
4176
  "learning_rate": 5.788804071246819e-06,
4177
- "loss": 1.5931,
4178
  "step": 695
4179
  },
4180
  {
4181
  "epoch": 2.66,
4182
  "learning_rate": 5.725190839694657e-06,
4183
- "loss": 1.3864,
4184
  "step": 696
4185
  },
4186
  {
4187
  "epoch": 2.66,
4188
  "learning_rate": 5.661577608142494e-06,
4189
- "loss": 1.422,
4190
  "step": 697
4191
  },
4192
  {
4193
  "epoch": 2.66,
4194
  "learning_rate": 5.597964376590331e-06,
4195
- "loss": 1.5293,
4196
  "step": 698
4197
  },
4198
  {
4199
  "epoch": 2.67,
4200
  "learning_rate": 5.534351145038168e-06,
4201
- "loss": 1.4404,
4202
  "step": 699
4203
  },
4204
  {
4205
  "epoch": 2.67,
4206
  "learning_rate": 5.470737913486006e-06,
4207
- "loss": 1.475,
4208
  "step": 700
4209
  },
4210
  {
4211
  "epoch": 2.68,
4212
  "learning_rate": 5.407124681933842e-06,
4213
- "loss": 1.4067,
4214
  "step": 701
4215
  },
4216
  {
4217
  "epoch": 2.68,
4218
  "learning_rate": 5.343511450381679e-06,
4219
- "loss": 1.4764,
4220
  "step": 702
4221
  },
4222
  {
4223
  "epoch": 2.68,
4224
  "learning_rate": 5.2798982188295165e-06,
4225
- "loss": 1.5613,
4226
  "step": 703
4227
  },
4228
  {
4229
  "epoch": 2.69,
4230
  "learning_rate": 5.216284987277354e-06,
4231
- "loss": 1.5702,
4232
  "step": 704
4233
  },
4234
  {
4235
  "epoch": 2.69,
4236
  "learning_rate": 5.1526717557251914e-06,
4237
- "loss": 1.4421,
4238
  "step": 705
4239
  },
4240
  {
4241
  "epoch": 2.69,
4242
  "learning_rate": 5.089058524173028e-06,
4243
- "loss": 1.5176,
4244
  "step": 706
4245
  },
4246
  {
4247
  "epoch": 2.7,
4248
  "learning_rate": 5.025445292620866e-06,
4249
- "loss": 1.5791,
4250
  "step": 707
4251
  },
4252
  {
4253
  "epoch": 2.7,
4254
  "learning_rate": 4.961832061068703e-06,
4255
- "loss": 1.6155,
4256
  "step": 708
4257
  },
4258
  {
4259
  "epoch": 2.71,
4260
  "learning_rate": 4.89821882951654e-06,
4261
- "loss": 1.3407,
4262
  "step": 709
4263
  },
4264
  {
4265
  "epoch": 2.71,
4266
  "learning_rate": 4.834605597964377e-06,
4267
- "loss": 1.5376,
4268
  "step": 710
4269
  },
4270
  {
4271
  "epoch": 2.71,
4272
  "learning_rate": 4.770992366412214e-06,
4273
- "loss": 1.4459,
4274
  "step": 711
4275
  },
4276
  {
4277
  "epoch": 2.72,
4278
  "learning_rate": 4.707379134860051e-06,
4279
- "loss": 1.4462,
4280
  "step": 712
4281
  },
4282
  {
4283
  "epoch": 2.72,
4284
  "learning_rate": 4.643765903307888e-06,
4285
- "loss": 1.4415,
4286
  "step": 713
4287
  },
4288
  {
4289
  "epoch": 2.73,
4290
  "learning_rate": 4.580152671755725e-06,
4291
- "loss": 1.4233,
4292
  "step": 714
4293
  },
4294
  {
4295
  "epoch": 2.73,
4296
  "learning_rate": 4.516539440203563e-06,
4297
- "loss": 1.5232,
4298
  "step": 715
4299
  },
4300
  {
4301
  "epoch": 2.73,
4302
  "learning_rate": 4.4529262086514e-06,
4303
- "loss": 1.4517,
4304
  "step": 716
4305
  },
4306
  {
4307
  "epoch": 2.74,
4308
  "learning_rate": 4.389312977099236e-06,
4309
- "loss": 1.4326,
4310
  "step": 717
4311
  },
4312
  {
4313
  "epoch": 2.74,
4314
  "learning_rate": 4.325699745547074e-06,
4315
- "loss": 1.527,
4316
  "step": 718
4317
  },
4318
  {
4319
  "epoch": 2.74,
4320
  "learning_rate": 4.262086513994911e-06,
4321
- "loss": 1.5301,
4322
  "step": 719
4323
  },
4324
  {
4325
  "epoch": 2.75,
4326
  "learning_rate": 4.198473282442748e-06,
4327
- "loss": 1.5043,
4328
  "step": 720
4329
  },
4330
  {
4331
  "epoch": 2.75,
4332
  "learning_rate": 4.134860050890585e-06,
4333
- "loss": 1.549,
4334
  "step": 721
4335
  },
4336
  {
4337
  "epoch": 2.76,
4338
  "learning_rate": 4.0712468193384225e-06,
4339
- "loss": 1.5779,
4340
  "step": 722
4341
  },
4342
  {
4343
  "epoch": 2.76,
4344
  "learning_rate": 4.0076335877862595e-06,
4345
- "loss": 1.437,
4346
  "step": 723
4347
  },
4348
  {
4349
  "epoch": 2.76,
4350
  "learning_rate": 3.944020356234097e-06,
4351
- "loss": 1.5213,
4352
  "step": 724
4353
  },
4354
  {
4355
  "epoch": 2.77,
4356
  "learning_rate": 3.880407124681934e-06,
4357
- "loss": 1.4216,
4358
  "step": 725
4359
  },
4360
  {
4361
  "epoch": 2.77,
4362
  "learning_rate": 3.816793893129772e-06,
4363
- "loss": 1.4927,
4364
  "step": 726
4365
  },
4366
  {
4367
  "epoch": 2.77,
4368
  "learning_rate": 3.7531806615776087e-06,
4369
- "loss": 1.4148,
4370
  "step": 727
4371
  },
4372
  {
4373
  "epoch": 2.78,
4374
  "learning_rate": 3.6895674300254453e-06,
4375
- "loss": 1.5635,
4376
  "step": 728
4377
  },
4378
  {
4379
  "epoch": 2.78,
4380
  "learning_rate": 3.6259541984732824e-06,
4381
- "loss": 1.5999,
4382
  "step": 729
4383
  },
4384
  {
4385
  "epoch": 2.79,
4386
  "learning_rate": 3.56234096692112e-06,
4387
- "loss": 1.5413,
4388
  "step": 730
4389
  },
4390
  {
4391
  "epoch": 2.79,
4392
  "learning_rate": 3.498727735368957e-06,
4393
- "loss": 1.6005,
4394
  "step": 731
4395
  },
4396
  {
4397
  "epoch": 2.79,
4398
  "learning_rate": 3.4351145038167944e-06,
4399
- "loss": 1.3934,
4400
  "step": 732
4401
  },
4402
  {
4403
  "epoch": 2.8,
4404
  "learning_rate": 3.3715012722646315e-06,
4405
- "loss": 1.4354,
4406
  "step": 733
4407
  },
4408
  {
4409
  "epoch": 2.8,
4410
  "learning_rate": 3.307888040712468e-06,
4411
- "loss": 1.4843,
4412
  "step": 734
4413
  },
4414
  {
4415
  "epoch": 2.81,
4416
  "learning_rate": 3.2442748091603052e-06,
4417
- "loss": 1.4673,
4418
  "step": 735
4419
  },
4420
  {
4421
  "epoch": 2.81,
4422
  "learning_rate": 3.1806615776081427e-06,
4423
- "loss": 1.4752,
4424
  "step": 736
4425
  },
4426
  {
4427
  "epoch": 2.81,
4428
  "learning_rate": 3.1170483460559798e-06,
4429
- "loss": 1.6207,
4430
  "step": 737
4431
  },
4432
  {
4433
  "epoch": 2.82,
4434
  "learning_rate": 3.053435114503817e-06,
4435
- "loss": 1.5183,
4436
  "step": 738
4437
  },
4438
  {
4439
  "epoch": 2.82,
4440
  "learning_rate": 2.989821882951654e-06,
4441
- "loss": 1.5892,
4442
  "step": 739
4443
  },
4444
  {
4445
  "epoch": 2.82,
4446
  "learning_rate": 2.9262086513994914e-06,
4447
- "loss": 1.5019,
4448
  "step": 740
4449
  },
4450
  {
4451
  "epoch": 2.83,
4452
  "learning_rate": 2.8625954198473285e-06,
4453
- "loss": 1.4875,
4454
  "step": 741
4455
  },
4456
  {
4457
  "epoch": 2.83,
4458
  "learning_rate": 2.7989821882951656e-06,
4459
- "loss": 1.4715,
4460
  "step": 742
4461
  },
4462
  {
4463
  "epoch": 2.84,
4464
  "learning_rate": 2.735368956743003e-06,
4465
- "loss": 1.4148,
4466
  "step": 743
4467
  },
4468
  {
4469
  "epoch": 2.84,
4470
  "learning_rate": 2.6717557251908397e-06,
4471
- "loss": 1.5031,
4472
  "step": 744
4473
  },
4474
  {
4475
  "epoch": 2.84,
4476
  "learning_rate": 2.608142493638677e-06,
4477
- "loss": 1.5024,
4478
  "step": 745
4479
  },
4480
  {
4481
  "epoch": 2.85,
4482
  "learning_rate": 2.544529262086514e-06,
4483
- "loss": 1.4872,
4484
  "step": 746
4485
  },
4486
  {
4487
  "epoch": 2.85,
4488
  "learning_rate": 2.4809160305343513e-06,
4489
- "loss": 1.5258,
4490
  "step": 747
4491
  },
4492
  {
4493
  "epoch": 2.85,
4494
  "learning_rate": 2.4173027989821884e-06,
4495
- "loss": 1.4141,
4496
  "step": 748
4497
  },
4498
  {
4499
  "epoch": 2.86,
4500
  "learning_rate": 2.3536895674300255e-06,
4501
- "loss": 1.5452,
4502
  "step": 749
4503
  },
4504
  {
4505
  "epoch": 2.86,
4506
  "learning_rate": 2.2900763358778625e-06,
4507
- "loss": 1.4944,
4508
  "step": 750
4509
  },
4510
  {
4511
  "epoch": 2.87,
4512
  "learning_rate": 2.2264631043257e-06,
4513
- "loss": 1.4778,
4514
  "step": 751
4515
  },
4516
  {
4517
  "epoch": 2.87,
4518
  "learning_rate": 2.162849872773537e-06,
4519
- "loss": 1.4943,
4520
  "step": 752
4521
  },
4522
  {
4523
  "epoch": 2.87,
4524
  "learning_rate": 2.099236641221374e-06,
4525
- "loss": 1.5323,
4526
  "step": 753
4527
  },
4528
  {
4529
  "epoch": 2.88,
4530
  "learning_rate": 2.0356234096692112e-06,
4531
- "loss": 1.4696,
4532
  "step": 754
4533
  },
4534
  {
4535
  "epoch": 2.88,
4536
  "learning_rate": 1.9720101781170483e-06,
4537
- "loss": 1.4476,
4538
  "step": 755
4539
  },
4540
  {
4541
  "epoch": 2.89,
4542
  "learning_rate": 1.908396946564886e-06,
4543
- "loss": 1.4159,
4544
  "step": 756
4545
  },
4546
  {
4547
  "epoch": 2.89,
4548
  "learning_rate": 1.8447837150127227e-06,
4549
- "loss": 1.5014,
4550
  "step": 757
4551
  },
4552
  {
4553
  "epoch": 2.89,
4554
  "learning_rate": 1.78117048346056e-06,
4555
- "loss": 1.5421,
4556
  "step": 758
4557
  },
4558
  {
4559
  "epoch": 2.9,
4560
  "learning_rate": 1.7175572519083972e-06,
4561
- "loss": 1.499,
4562
  "step": 759
4563
  },
4564
  {
4565
  "epoch": 2.9,
4566
  "learning_rate": 1.653944020356234e-06,
4567
- "loss": 1.5418,
4568
  "step": 760
4569
  },
4570
  {
4571
  "epoch": 2.9,
4572
  "learning_rate": 1.5903307888040714e-06,
4573
- "loss": 1.3879,
4574
  "step": 761
4575
  },
4576
  {
4577
  "epoch": 2.91,
4578
  "learning_rate": 1.5267175572519084e-06,
4579
- "loss": 1.5063,
4580
  "step": 762
4581
  },
4582
  {
4583
  "epoch": 2.91,
4584
  "learning_rate": 1.4631043256997457e-06,
4585
- "loss": 1.4184,
4586
  "step": 763
4587
  },
4588
  {
4589
  "epoch": 2.92,
4590
  "learning_rate": 1.3994910941475828e-06,
4591
- "loss": 1.4975,
4592
  "step": 764
4593
  },
4594
  {
4595
  "epoch": 2.92,
4596
  "learning_rate": 1.3358778625954198e-06,
4597
- "loss": 1.4759,
4598
  "step": 765
4599
  },
4600
  {
4601
  "epoch": 2.92,
4602
  "learning_rate": 1.272264631043257e-06,
4603
- "loss": 1.5125,
4604
  "step": 766
4605
  },
4606
  {
4607
  "epoch": 2.93,
4608
  "learning_rate": 1.2086513994910942e-06,
4609
- "loss": 1.462,
4610
  "step": 767
4611
  },
4612
  {
4613
  "epoch": 2.93,
4614
  "learning_rate": 1.1450381679389313e-06,
4615
- "loss": 1.4025,
4616
  "step": 768
4617
  },
4618
  {
4619
  "epoch": 2.94,
4620
  "learning_rate": 1.0814249363867685e-06,
4621
- "loss": 1.4899,
4622
  "step": 769
4623
  },
4624
  {
4625
  "epoch": 2.94,
4626
  "learning_rate": 1.0178117048346056e-06,
4627
- "loss": 1.403,
4628
  "step": 770
4629
  },
4630
  {
4631
  "epoch": 2.94,
4632
  "learning_rate": 9.54198473282443e-07,
4633
- "loss": 1.4073,
4634
  "step": 771
4635
  },
4636
  {
4637
  "epoch": 2.95,
4638
  "learning_rate": 8.9058524173028e-07,
4639
- "loss": 1.4358,
4640
  "step": 772
4641
  },
4642
  {
4643
  "epoch": 2.95,
4644
  "learning_rate": 8.26972010178117e-07,
4645
- "loss": 1.6034,
4646
  "step": 773
4647
  },
4648
  {
4649
  "epoch": 2.95,
4650
  "learning_rate": 7.633587786259542e-07,
4651
- "loss": 1.6185,
4652
  "step": 774
4653
  },
4654
  {
4655
  "epoch": 2.96,
4656
  "learning_rate": 6.997455470737914e-07,
4657
- "loss": 1.4775,
4658
  "step": 775
4659
  },
4660
  {
4661
  "epoch": 2.96,
4662
  "learning_rate": 6.361323155216285e-07,
4663
- "loss": 1.4595,
4664
  "step": 776
4665
  },
4666
  {
4667
  "epoch": 2.97,
4668
  "learning_rate": 5.725190839694656e-07,
4669
- "loss": 1.4733,
4670
  "step": 777
4671
  },
4672
  {
4673
  "epoch": 2.97,
4674
  "learning_rate": 5.089058524173028e-07,
4675
- "loss": 1.4463,
4676
  "step": 778
4677
  },
4678
  {
4679
  "epoch": 2.97,
4680
  "learning_rate": 4.4529262086514e-07,
4681
- "loss": 1.5377,
4682
  "step": 779
4683
  },
4684
  {
4685
  "epoch": 2.98,
4686
  "learning_rate": 3.816793893129771e-07,
4687
- "loss": 1.5329,
4688
  "step": 780
4689
  },
4690
  {
4691
  "epoch": 2.98,
4692
  "learning_rate": 3.1806615776081423e-07,
4693
- "loss": 1.4827,
4694
  "step": 781
4695
  },
4696
  {
4697
  "epoch": 2.98,
4698
  "learning_rate": 2.544529262086514e-07,
4699
- "loss": 1.5151,
4700
  "step": 782
4701
  },
4702
  {
4703
  "epoch": 2.99,
4704
  "learning_rate": 1.9083969465648855e-07,
4705
- "loss": 1.5695,
4706
  "step": 783
4707
  },
4708
  {
4709
  "epoch": 2.99,
4710
  "learning_rate": 1.272264631043257e-07,
4711
- "loss": 1.4626,
4712
  "step": 784
4713
  },
4714
  {
4715
  "epoch": 3.0,
4716
  "learning_rate": 6.361323155216285e-08,
4717
- "loss": 1.5292,
4718
  "step": 785
4719
  },
4720
  {
4721
  "epoch": 3.0,
4722
  "learning_rate": 0.0,
4723
- "loss": 1.5321,
4724
  "step": 786
4725
  },
4726
  {
4727
  "epoch": 3.0,
4728
  "step": 786,
4729
  "total_flos": 1.2699738389348352e+16,
4730
- "train_loss": 1.6920310260382014,
4731
- "train_runtime": 827.5296,
4732
- "train_samples_per_second": 22.734,
4733
- "train_steps_per_second": 0.95
4734
  }
4735
  ],
4736
  "max_steps": 786,
 
3010
  {
3011
  "epoch": 1.91,
3012
  "learning_rate": 1.812977099236641e-05,
3013
+ "loss": 1.6102,
3014
  "step": 501
3015
  },
3016
  {
3017
  "epoch": 1.92,
3018
  "learning_rate": 1.8066157760814252e-05,
3019
+ "loss": 1.5535,
3020
  "step": 502
3021
  },
3022
  {
3023
  "epoch": 1.92,
3024
  "learning_rate": 1.800254452926209e-05,
3025
+ "loss": 1.7148,
3026
  "step": 503
3027
  },
3028
  {
3029
  "epoch": 1.92,
3030
  "learning_rate": 1.7938931297709923e-05,
3031
+ "loss": 1.4462,
3032
  "step": 504
3033
  },
3034
  {
3035
  "epoch": 1.93,
3036
  "learning_rate": 1.7875318066157763e-05,
3037
+ "loss": 1.5138,
3038
  "step": 505
3039
  },
3040
  {
3041
  "epoch": 1.93,
3042
  "learning_rate": 1.78117048346056e-05,
3043
+ "loss": 1.5897,
3044
  "step": 506
3045
  },
3046
  {
3047
  "epoch": 1.94,
3048
  "learning_rate": 1.7748091603053434e-05,
3049
+ "loss": 1.5578,
3050
  "step": 507
3051
  },
3052
  {
3053
  "epoch": 1.94,
3054
  "learning_rate": 1.7684478371501274e-05,
3055
+ "loss": 1.4859,
3056
  "step": 508
3057
  },
3058
  {
3059
  "epoch": 1.94,
3060
  "learning_rate": 1.762086513994911e-05,
3061
+ "loss": 1.5531,
3062
  "step": 509
3063
  },
3064
  {
3065
  "epoch": 1.95,
3066
  "learning_rate": 1.7557251908396945e-05,
3067
+ "loss": 1.438,
3068
  "step": 510
3069
  },
3070
  {
3071
  "epoch": 1.95,
3072
  "learning_rate": 1.7493638676844786e-05,
3073
+ "loss": 1.4883,
3074
  "step": 511
3075
  },
3076
  {
3077
  "epoch": 1.95,
3078
  "learning_rate": 1.7430025445292623e-05,
3079
+ "loss": 1.6651,
3080
  "step": 512
3081
  },
3082
  {
3083
  "epoch": 1.96,
3084
  "learning_rate": 1.736641221374046e-05,
3085
+ "loss": 1.6407,
3086
  "step": 513
3087
  },
3088
  {
3089
  "epoch": 1.96,
3090
  "learning_rate": 1.7302798982188297e-05,
3091
+ "loss": 1.5907,
3092
  "step": 514
3093
  },
3094
  {
3095
  "epoch": 1.97,
3096
  "learning_rate": 1.7239185750636134e-05,
3097
+ "loss": 1.4254,
3098
  "step": 515
3099
  },
3100
  {
3101
  "epoch": 1.97,
3102
  "learning_rate": 1.717557251908397e-05,
3103
+ "loss": 1.5548,
3104
  "step": 516
3105
  },
3106
  {
3107
  "epoch": 1.97,
3108
  "learning_rate": 1.7111959287531808e-05,
3109
+ "loss": 1.6296,
3110
  "step": 517
3111
  },
3112
  {
3113
  "epoch": 1.98,
3114
  "learning_rate": 1.7048346055979645e-05,
3115
+ "loss": 1.6281,
3116
  "step": 518
3117
  },
3118
  {
3119
  "epoch": 1.98,
3120
  "learning_rate": 1.6984732824427482e-05,
3121
+ "loss": 1.5837,
3122
  "step": 519
3123
  },
3124
  {
3125
  "epoch": 1.98,
3126
  "learning_rate": 1.692111959287532e-05,
3127
+ "loss": 1.5029,
3128
  "step": 520
3129
  },
3130
  {
3131
  "epoch": 1.99,
3132
  "learning_rate": 1.6857506361323156e-05,
3133
+ "loss": 1.523,
3134
  "step": 521
3135
  },
3136
  {
3137
  "epoch": 1.99,
3138
  "learning_rate": 1.6793893129770993e-05,
3139
+ "loss": 1.7224,
3140
  "step": 522
3141
  },
3142
  {
3143
  "epoch": 2.0,
3144
  "learning_rate": 1.673027989821883e-05,
3145
+ "loss": 1.5858,
3146
  "step": 523
3147
  },
3148
  {
3149
  "epoch": 2.0,
3150
  "learning_rate": 1.6666666666666667e-05,
3151
+ "loss": 1.5929,
3152
  "step": 524
3153
  },
3154
  {
3155
  "epoch": 2.0,
3156
  "learning_rate": 1.6603053435114505e-05,
3157
+ "loss": 1.5214,
3158
  "step": 525
3159
  },
3160
  {
3161
  "epoch": 2.01,
3162
  "learning_rate": 1.653944020356234e-05,
3163
+ "loss": 1.5286,
3164
  "step": 526
3165
  },
3166
  {
3167
  "epoch": 2.01,
3168
  "learning_rate": 1.647582697201018e-05,
3169
+ "loss": 1.6211,
3170
  "step": 527
3171
  },
3172
  {
3173
  "epoch": 2.02,
3174
  "learning_rate": 1.6412213740458016e-05,
3175
+ "loss": 1.4927,
3176
  "step": 528
3177
  },
3178
  {
3179
  "epoch": 2.02,
3180
  "learning_rate": 1.6348600508905853e-05,
3181
+ "loss": 1.6878,
3182
  "step": 529
3183
  },
3184
  {
3185
  "epoch": 2.02,
3186
  "learning_rate": 1.628498727735369e-05,
3187
+ "loss": 1.5976,
3188
  "step": 530
3189
  },
3190
  {
3191
  "epoch": 2.03,
3192
  "learning_rate": 1.6221374045801527e-05,
3193
+ "loss": 1.5951,
3194
  "step": 531
3195
  },
3196
  {
3197
  "epoch": 2.03,
3198
  "learning_rate": 1.6157760814249364e-05,
3199
+ "loss": 1.5708,
3200
  "step": 532
3201
  },
3202
  {
3203
  "epoch": 2.03,
3204
  "learning_rate": 1.60941475826972e-05,
3205
+ "loss": 1.453,
3206
  "step": 533
3207
  },
3208
  {
3209
  "epoch": 2.04,
3210
  "learning_rate": 1.6030534351145038e-05,
3211
+ "loss": 1.5377,
3212
  "step": 534
3213
  },
3214
  {
3215
  "epoch": 2.04,
3216
  "learning_rate": 1.5966921119592875e-05,
3217
+ "loss": 1.5596,
3218
  "step": 535
3219
  },
3220
  {
3221
  "epoch": 2.05,
3222
  "learning_rate": 1.5903307888040712e-05,
3223
+ "loss": 1.4487,
3224
  "step": 536
3225
  },
3226
  {
3227
  "epoch": 2.05,
3228
  "learning_rate": 1.583969465648855e-05,
3229
+ "loss": 1.4551,
3230
  "step": 537
3231
  },
3232
  {
3233
  "epoch": 2.05,
3234
  "learning_rate": 1.5776081424936386e-05,
3235
+ "loss": 1.4983,
3236
  "step": 538
3237
  },
3238
  {
3239
  "epoch": 2.06,
3240
  "learning_rate": 1.5712468193384224e-05,
3241
+ "loss": 1.5847,
3242
  "step": 539
3243
  },
3244
  {
3245
  "epoch": 2.06,
3246
  "learning_rate": 1.5648854961832064e-05,
3247
+ "loss": 1.4896,
3248
  "step": 540
3249
  },
3250
  {
3251
  "epoch": 2.06,
3252
  "learning_rate": 1.5585241730279898e-05,
3253
+ "loss": 1.528,
3254
  "step": 541
3255
  },
3256
  {
3257
  "epoch": 2.07,
3258
  "learning_rate": 1.5521628498727735e-05,
3259
+ "loss": 1.5551,
3260
  "step": 542
3261
  },
3262
  {
3263
  "epoch": 2.07,
3264
  "learning_rate": 1.5458015267175575e-05,
3265
+ "loss": 1.4976,
3266
  "step": 543
3267
  },
3268
  {
3269
  "epoch": 2.08,
3270
  "learning_rate": 1.539440203562341e-05,
3271
+ "loss": 1.4458,
3272
  "step": 544
3273
  },
3274
  {
3275
  "epoch": 2.08,
3276
  "learning_rate": 1.5330788804071246e-05,
3277
+ "loss": 1.4596,
3278
  "step": 545
3279
  },
3280
  {
3281
  "epoch": 2.08,
3282
  "learning_rate": 1.5267175572519086e-05,
3283
+ "loss": 1.5857,
3284
  "step": 546
3285
  },
3286
  {
3287
  "epoch": 2.09,
3288
  "learning_rate": 1.5203562340966923e-05,
3289
+ "loss": 1.6295,
3290
  "step": 547
3291
  },
3292
  {
3293
  "epoch": 2.09,
3294
  "learning_rate": 1.5139949109414759e-05,
3295
+ "loss": 1.4606,
3296
  "step": 548
3297
  },
3298
  {
3299
  "epoch": 2.1,
3300
  "learning_rate": 1.5076335877862596e-05,
3301
+ "loss": 1.5193,
3302
  "step": 549
3303
  },
3304
  {
3305
  "epoch": 2.1,
3306
  "learning_rate": 1.5012722646310435e-05,
3307
+ "loss": 1.5228,
3308
  "step": 550
3309
  },
3310
  {
3311
  "epoch": 2.1,
3312
  "learning_rate": 1.494910941475827e-05,
3313
+ "loss": 1.51,
3314
  "step": 551
3315
  },
3316
  {
3317
  "epoch": 2.11,
3318
  "learning_rate": 1.4885496183206107e-05,
3319
+ "loss": 1.6053,
3320
  "step": 552
3321
  },
3322
  {
3323
  "epoch": 2.11,
3324
  "learning_rate": 1.4821882951653946e-05,
3325
+ "loss": 1.5259,
3326
  "step": 553
3327
  },
3328
  {
3329
  "epoch": 2.11,
3330
  "learning_rate": 1.4758269720101781e-05,
3331
+ "loss": 1.5255,
3332
  "step": 554
3333
  },
3334
  {
3335
  "epoch": 2.12,
3336
  "learning_rate": 1.4694656488549618e-05,
3337
+ "loss": 1.5712,
3338
  "step": 555
3339
  },
3340
  {
3341
  "epoch": 2.12,
3342
  "learning_rate": 1.4631043256997457e-05,
3343
+ "loss": 1.5506,
3344
  "step": 556
3345
  },
3346
  {
3347
  "epoch": 2.13,
3348
  "learning_rate": 1.4567430025445294e-05,
3349
+ "loss": 1.6035,
3350
  "step": 557
3351
  },
3352
  {
3353
  "epoch": 2.13,
3354
  "learning_rate": 1.450381679389313e-05,
3355
+ "loss": 1.5887,
3356
  "step": 558
3357
  },
3358
  {
3359
  "epoch": 2.13,
3360
  "learning_rate": 1.4440203562340968e-05,
3361
+ "loss": 1.6213,
3362
  "step": 559
3363
  },
3364
  {
3365
  "epoch": 2.14,
3366
  "learning_rate": 1.4376590330788805e-05,
3367
+ "loss": 1.7528,
3368
  "step": 560
3369
  },
3370
  {
3371
  "epoch": 2.14,
3372
  "learning_rate": 1.431297709923664e-05,
3373
+ "loss": 1.5456,
3374
  "step": 561
3375
  },
3376
  {
3377
  "epoch": 2.15,
3378
  "learning_rate": 1.424936386768448e-05,
3379
+ "loss": 1.5326,
3380
  "step": 562
3381
  },
3382
  {
3383
  "epoch": 2.15,
3384
  "learning_rate": 1.4185750636132317e-05,
3385
+ "loss": 1.6657,
3386
  "step": 563
3387
  },
3388
  {
3389
  "epoch": 2.15,
3390
  "learning_rate": 1.4122137404580155e-05,
3391
+ "loss": 1.5182,
3392
  "step": 564
3393
  },
3394
  {
3395
  "epoch": 2.16,
3396
  "learning_rate": 1.4058524173027989e-05,
3397
+ "loss": 1.5033,
3398
  "step": 565
3399
  },
3400
  {
3401
  "epoch": 2.16,
3402
  "learning_rate": 1.3994910941475828e-05,
3403
+ "loss": 1.5315,
3404
  "step": 566
3405
  },
3406
  {
3407
  "epoch": 2.16,
3408
  "learning_rate": 1.3931297709923667e-05,
3409
+ "loss": 1.4786,
3410
  "step": 567
3411
  },
3412
  {
3413
  "epoch": 2.17,
3414
  "learning_rate": 1.38676844783715e-05,
3415
+ "loss": 1.5207,
3416
  "step": 568
3417
  },
3418
  {
3419
  "epoch": 2.17,
3420
  "learning_rate": 1.3804071246819339e-05,
3421
+ "loss": 1.5073,
3422
  "step": 569
3423
  },
3424
  {
3425
  "epoch": 2.18,
3426
  "learning_rate": 1.3740458015267178e-05,
3427
+ "loss": 1.5242,
3428
  "step": 570
3429
  },
3430
  {
3431
  "epoch": 2.18,
3432
  "learning_rate": 1.3676844783715011e-05,
3433
+ "loss": 1.5465,
3434
  "step": 571
3435
  },
3436
  {
3437
  "epoch": 2.18,
3438
  "learning_rate": 1.361323155216285e-05,
3439
+ "loss": 1.5543,
3440
  "step": 572
3441
  },
3442
  {
3443
  "epoch": 2.19,
3444
  "learning_rate": 1.3549618320610687e-05,
3445
+ "loss": 1.4518,
3446
  "step": 573
3447
  },
3448
  {
3449
  "epoch": 2.19,
3450
  "learning_rate": 1.3486005089058526e-05,
3451
+ "loss": 1.4947,
3452
  "step": 574
3453
  },
3454
  {
3455
  "epoch": 2.19,
3456
  "learning_rate": 1.3422391857506361e-05,
3457
+ "loss": 1.4802,
3458
  "step": 575
3459
  },
3460
  {
3461
  "epoch": 2.2,
3462
  "learning_rate": 1.3358778625954198e-05,
3463
+ "loss": 1.5998,
3464
  "step": 576
3465
  },
3466
  {
3467
  "epoch": 2.2,
3468
  "learning_rate": 1.3295165394402037e-05,
3469
+ "loss": 1.549,
3470
  "step": 577
3471
  },
3472
  {
3473
  "epoch": 2.21,
3474
  "learning_rate": 1.3231552162849873e-05,
3475
+ "loss": 1.5899,
3476
  "step": 578
3477
  },
3478
  {
3479
  "epoch": 2.21,
3480
  "learning_rate": 1.316793893129771e-05,
3481
+ "loss": 1.6233,
3482
  "step": 579
3483
  },
3484
  {
3485
  "epoch": 2.21,
3486
  "learning_rate": 1.3104325699745548e-05,
3487
+ "loss": 1.5587,
3488
  "step": 580
3489
  },
3490
  {
3491
  "epoch": 2.22,
3492
  "learning_rate": 1.3040712468193386e-05,
3493
+ "loss": 1.4515,
3494
  "step": 581
3495
  },
3496
  {
3497
  "epoch": 2.22,
3498
  "learning_rate": 1.2977099236641221e-05,
3499
+ "loss": 1.5081,
3500
  "step": 582
3501
  },
3502
  {
3503
  "epoch": 2.23,
3504
  "learning_rate": 1.291348600508906e-05,
3505
+ "loss": 1.4917,
3506
  "step": 583
3507
  },
3508
  {
3509
  "epoch": 2.23,
3510
  "learning_rate": 1.2849872773536897e-05,
3511
+ "loss": 1.6199,
3512
  "step": 584
3513
  },
3514
  {
3515
  "epoch": 2.23,
3516
  "learning_rate": 1.2786259541984732e-05,
3517
+ "loss": 1.4865,
3518
  "step": 585
3519
  },
3520
  {
3521
  "epoch": 2.24,
3522
  "learning_rate": 1.2722646310432571e-05,
3523
+ "loss": 1.589,
3524
  "step": 586
3525
  },
3526
  {
3527
  "epoch": 2.24,
3528
  "learning_rate": 1.2659033078880408e-05,
3529
+ "loss": 1.6169,
3530
  "step": 587
3531
  },
3532
  {
3533
  "epoch": 2.24,
3534
  "learning_rate": 1.2595419847328243e-05,
3535
+ "loss": 1.5173,
3536
  "step": 588
3537
  },
3538
  {
3539
  "epoch": 2.25,
3540
  "learning_rate": 1.2531806615776082e-05,
3541
+ "loss": 1.5147,
3542
  "step": 589
3543
  },
3544
  {
3545
  "epoch": 2.25,
3546
  "learning_rate": 1.2468193384223919e-05,
3547
+ "loss": 1.5146,
3548
  "step": 590
3549
  },
3550
  {
3551
  "epoch": 2.26,
3552
  "learning_rate": 1.2404580152671756e-05,
3553
+ "loss": 1.5204,
3554
  "step": 591
3555
  },
3556
  {
3557
  "epoch": 2.26,
3558
  "learning_rate": 1.2340966921119595e-05,
3559
+ "loss": 1.5539,
3560
  "step": 592
3561
  },
3562
  {
3563
  "epoch": 2.26,
3564
  "learning_rate": 1.227735368956743e-05,
3565
+ "loss": 1.4935,
3566
  "step": 593
3567
  },
3568
  {
3569
  "epoch": 2.27,
3570
  "learning_rate": 1.2213740458015267e-05,
3571
+ "loss": 1.5304,
3572
  "step": 594
3573
  },
3574
  {
3575
  "epoch": 2.27,
3576
  "learning_rate": 1.2150127226463104e-05,
3577
+ "loss": 1.4887,
3578
  "step": 595
3579
  },
3580
  {
3581
  "epoch": 2.27,
3582
  "learning_rate": 1.2086513994910942e-05,
3583
+ "loss": 1.5163,
3584
  "step": 596
3585
  },
3586
  {
3587
  "epoch": 2.28,
3588
  "learning_rate": 1.202290076335878e-05,
3589
+ "loss": 1.5368,
3590
  "step": 597
3591
  },
3592
  {
3593
  "epoch": 2.28,
3594
  "learning_rate": 1.1959287531806616e-05,
3595
+ "loss": 1.6242,
3596
  "step": 598
3597
  },
3598
  {
3599
  "epoch": 2.29,
3600
  "learning_rate": 1.1895674300254453e-05,
3601
+ "loss": 1.46,
3602
  "step": 599
3603
  },
3604
  {
3605
  "epoch": 2.29,
3606
  "learning_rate": 1.1832061068702292e-05,
3607
+ "loss": 1.4866,
3608
  "step": 600
3609
  },
3610
  {
3611
  "epoch": 2.29,
3612
  "learning_rate": 1.1768447837150127e-05,
3613
+ "loss": 1.6052,
3614
  "step": 601
3615
  },
3616
  {
3617
  "epoch": 2.3,
3618
  "learning_rate": 1.1704834605597966e-05,
3619
+ "loss": 1.5368,
3620
  "step": 602
3621
  },
3622
  {
3623
  "epoch": 2.3,
3624
  "learning_rate": 1.1641221374045803e-05,
3625
+ "loss": 1.5202,
3626
  "step": 603
3627
  },
3628
  {
3629
  "epoch": 2.31,
3630
  "learning_rate": 1.1577608142493638e-05,
3631
+ "loss": 1.4896,
3632
  "step": 604
3633
  },
3634
  {
3635
  "epoch": 2.31,
3636
  "learning_rate": 1.1513994910941477e-05,
3637
+ "loss": 1.542,
3638
  "step": 605
3639
  },
3640
  {
3641
  "epoch": 2.31,
3642
  "learning_rate": 1.1450381679389314e-05,
3643
+ "loss": 1.5377,
3644
  "step": 606
3645
  },
3646
  {
3647
  "epoch": 2.32,
3648
  "learning_rate": 1.1386768447837151e-05,
3649
+ "loss": 1.4361,
3650
  "step": 607
3651
  },
3652
  {
3653
  "epoch": 2.32,
3654
  "learning_rate": 1.1323155216284988e-05,
3655
+ "loss": 1.5759,
3656
  "step": 608
3657
  },
3658
  {
3659
  "epoch": 2.32,
3660
  "learning_rate": 1.1259541984732823e-05,
3661
+ "loss": 1.488,
3662
  "step": 609
3663
  },
3664
  {
3665
  "epoch": 2.33,
3666
  "learning_rate": 1.1195928753180662e-05,
3667
+ "loss": 1.604,
3668
  "step": 610
3669
  },
3670
  {
3671
  "epoch": 2.33,
3672
  "learning_rate": 1.11323155216285e-05,
3673
+ "loss": 1.4868,
3674
  "step": 611
3675
  },
3676
  {
3677
  "epoch": 2.34,
3678
  "learning_rate": 1.1068702290076336e-05,
3679
+ "loss": 1.6055,
3680
  "step": 612
3681
  },
3682
  {
3683
  "epoch": 2.34,
3684
  "learning_rate": 1.1005089058524173e-05,
3685
+ "loss": 1.7024,
3686
  "step": 613
3687
  },
3688
  {
3689
  "epoch": 2.34,
3690
  "learning_rate": 1.0941475826972012e-05,
3691
+ "loss": 1.6354,
3692
  "step": 614
3693
  },
3694
  {
3695
  "epoch": 2.35,
3696
  "learning_rate": 1.0877862595419848e-05,
3697
+ "loss": 1.4439,
3698
  "step": 615
3699
  },
3700
  {
3701
  "epoch": 2.35,
3702
  "learning_rate": 1.0814249363867685e-05,
3703
+ "loss": 1.5649,
3704
  "step": 616
3705
  },
3706
  {
3707
  "epoch": 2.35,
3708
  "learning_rate": 1.0750636132315522e-05,
3709
+ "loss": 1.4975,
3710
  "step": 617
3711
  },
3712
  {
3713
  "epoch": 2.36,
3714
  "learning_rate": 1.0687022900763359e-05,
3715
+ "loss": 1.5618,
3716
  "step": 618
3717
  },
3718
  {
3719
  "epoch": 2.36,
3720
  "learning_rate": 1.0623409669211198e-05,
3721
+ "loss": 1.5837,
3722
  "step": 619
3723
  },
3724
  {
3725
  "epoch": 2.37,
3726
  "learning_rate": 1.0559796437659033e-05,
3727
+ "loss": 1.6462,
3728
  "step": 620
3729
  },
3730
  {
3731
  "epoch": 2.37,
3732
  "learning_rate": 1.049618320610687e-05,
3733
+ "loss": 1.576,
3734
  "step": 621
3735
  },
3736
  {
3737
  "epoch": 2.37,
3738
  "learning_rate": 1.0432569974554709e-05,
3739
+ "loss": 1.5261,
3740
  "step": 622
3741
  },
3742
  {
3743
  "epoch": 2.38,
3744
  "learning_rate": 1.0368956743002544e-05,
3745
+ "loss": 1.5401,
3746
  "step": 623
3747
  },
3748
  {
3749
  "epoch": 2.38,
3750
  "learning_rate": 1.0305343511450383e-05,
3751
+ "loss": 1.5797,
3752
  "step": 624
3753
  },
3754
  {
3755
  "epoch": 2.39,
3756
  "learning_rate": 1.024173027989822e-05,
3757
+ "loss": 1.5965,
3758
  "step": 625
3759
  },
3760
  {
3761
  "epoch": 2.39,
3762
  "learning_rate": 1.0178117048346055e-05,
3763
+ "loss": 1.515,
3764
  "step": 626
3765
  },
3766
  {
3767
  "epoch": 2.39,
3768
  "learning_rate": 1.0114503816793894e-05,
3769
+ "loss": 1.54,
3770
  "step": 627
3771
  },
3772
  {
3773
  "epoch": 2.4,
3774
  "learning_rate": 1.0050890585241731e-05,
3775
+ "loss": 1.5752,
3776
  "step": 628
3777
  },
3778
  {
3779
  "epoch": 2.4,
3780
  "learning_rate": 9.987277353689568e-06,
3781
+ "loss": 1.5602,
3782
  "step": 629
3783
  },
3784
  {
3785
  "epoch": 2.4,
3786
  "learning_rate": 9.923664122137405e-06,
3787
+ "loss": 1.5375,
3788
  "step": 630
3789
  },
3790
  {
3791
  "epoch": 2.41,
3792
  "learning_rate": 9.860050890585242e-06,
3793
+ "loss": 1.63,
3794
  "step": 631
3795
  },
3796
  {
3797
  "epoch": 2.41,
3798
  "learning_rate": 9.79643765903308e-06,
3799
+ "loss": 1.6018,
3800
  "step": 632
3801
  },
3802
  {
3803
  "epoch": 2.42,
3804
  "learning_rate": 9.732824427480917e-06,
3805
+ "loss": 1.5341,
3806
  "step": 633
3807
  },
3808
  {
3809
  "epoch": 2.42,
3810
  "learning_rate": 9.669211195928754e-06,
3811
+ "loss": 1.5728,
3812
  "step": 634
3813
  },
3814
  {
3815
  "epoch": 2.42,
3816
  "learning_rate": 9.60559796437659e-06,
3817
+ "loss": 1.4774,
3818
  "step": 635
3819
  },
3820
  {
3821
  "epoch": 2.43,
3822
  "learning_rate": 9.541984732824428e-06,
3823
+ "loss": 1.5961,
3824
  "step": 636
3825
  },
3826
  {
3827
  "epoch": 2.43,
3828
  "learning_rate": 9.478371501272265e-06,
3829
+ "loss": 1.5615,
3830
  "step": 637
3831
  },
3832
  {
3833
  "epoch": 2.44,
3834
  "learning_rate": 9.414758269720102e-06,
3835
+ "loss": 1.5452,
3836
  "step": 638
3837
  },
3838
  {
3839
  "epoch": 2.44,
3840
  "learning_rate": 9.351145038167939e-06,
3841
+ "loss": 1.5388,
3842
  "step": 639
3843
  },
3844
  {
3845
  "epoch": 2.44,
3846
  "learning_rate": 9.287531806615776e-06,
3847
+ "loss": 1.341,
3848
  "step": 640
3849
  },
3850
  {
3851
  "epoch": 2.45,
3852
  "learning_rate": 9.223918575063615e-06,
3853
+ "loss": 1.3478,
3854
  "step": 641
3855
  },
3856
  {
3857
  "epoch": 2.45,
3858
  "learning_rate": 9.16030534351145e-06,
3859
+ "loss": 1.496,
3860
  "step": 642
3861
  },
3862
  {
3863
  "epoch": 2.45,
3864
  "learning_rate": 9.096692111959287e-06,
3865
+ "loss": 1.425,
3866
  "step": 643
3867
  },
3868
  {
3869
  "epoch": 2.46,
3870
  "learning_rate": 9.033078880407126e-06,
3871
+ "loss": 1.6387,
3872
  "step": 644
3873
  },
3874
  {
3875
  "epoch": 2.46,
3876
  "learning_rate": 8.969465648854961e-06,
3877
+ "loss": 1.5034,
3878
  "step": 645
3879
  },
3880
  {
3881
  "epoch": 2.47,
3882
  "learning_rate": 8.9058524173028e-06,
3883
+ "loss": 1.7018,
3884
  "step": 646
3885
  },
3886
  {
3887
  "epoch": 2.47,
3888
  "learning_rate": 8.842239185750637e-06,
3889
+ "loss": 1.384,
3890
  "step": 647
3891
  },
3892
  {
3893
  "epoch": 2.47,
3894
  "learning_rate": 8.778625954198473e-06,
3895
+ "loss": 1.5595,
3896
  "step": 648
3897
  },
3898
  {
3899
  "epoch": 2.48,
3900
  "learning_rate": 8.715012722646311e-06,
3901
+ "loss": 1.5429,
3902
  "step": 649
3903
  },
3904
  {
3905
  "epoch": 2.48,
3906
  "learning_rate": 8.651399491094148e-06,
3907
+ "loss": 1.4169,
3908
  "step": 650
3909
  },
3910
  {
3911
  "epoch": 2.48,
3912
  "learning_rate": 8.587786259541985e-06,
3913
+ "loss": 1.4469,
3914
  "step": 651
3915
  },
3916
  {
3917
  "epoch": 2.49,
3918
  "learning_rate": 8.524173027989823e-06,
3919
+ "loss": 1.5503,
3920
  "step": 652
3921
  },
3922
  {
3923
  "epoch": 2.49,
3924
  "learning_rate": 8.46055979643766e-06,
3925
+ "loss": 1.4019,
3926
  "step": 653
3927
  },
3928
  {
3929
  "epoch": 2.5,
3930
  "learning_rate": 8.396946564885497e-06,
3931
+ "loss": 1.392,
3932
  "step": 654
3933
  },
3934
  {
3935
  "epoch": 2.5,
3936
  "learning_rate": 8.333333333333334e-06,
3937
+ "loss": 1.4966,
3938
  "step": 655
3939
  },
3940
  {
3941
  "epoch": 2.5,
3942
  "learning_rate": 8.26972010178117e-06,
3943
+ "loss": 1.3869,
3944
  "step": 656
3945
  },
3946
  {
3947
  "epoch": 2.51,
3948
  "learning_rate": 8.206106870229008e-06,
3949
+ "loss": 1.5947,
3950
  "step": 657
3951
  },
3952
  {
3953
  "epoch": 2.51,
3954
  "learning_rate": 8.142493638676845e-06,
3955
+ "loss": 1.6324,
3956
  "step": 658
3957
  },
3958
  {
3959
  "epoch": 2.52,
3960
  "learning_rate": 8.078880407124682e-06,
3961
+ "loss": 1.5893,
3962
  "step": 659
3963
  },
3964
  {
3965
  "epoch": 2.52,
3966
  "learning_rate": 8.015267175572519e-06,
3967
+ "loss": 1.4458,
3968
  "step": 660
3969
  },
3970
  {
3971
  "epoch": 2.52,
3972
  "learning_rate": 7.951653944020356e-06,
3973
+ "loss": 1.4867,
3974
  "step": 661
3975
  },
3976
  {
3977
  "epoch": 2.53,
3978
  "learning_rate": 7.888040712468193e-06,
3979
+ "loss": 1.6153,
3980
  "step": 662
3981
  },
3982
  {
3983
  "epoch": 2.53,
3984
  "learning_rate": 7.824427480916032e-06,
3985
+ "loss": 1.444,
3986
  "step": 663
3987
  },
3988
  {
3989
  "epoch": 2.53,
3990
  "learning_rate": 7.760814249363867e-06,
3991
+ "loss": 1.558,
3992
  "step": 664
3993
  },
3994
  {
3995
  "epoch": 2.54,
3996
  "learning_rate": 7.697201017811704e-06,
3997
+ "loss": 1.5311,
3998
  "step": 665
3999
  },
4000
  {
4001
  "epoch": 2.54,
4002
  "learning_rate": 7.633587786259543e-06,
4003
+ "loss": 1.5768,
4004
  "step": 666
4005
  },
4006
  {
4007
  "epoch": 2.55,
4008
  "learning_rate": 7.569974554707379e-06,
4009
+ "loss": 1.4543,
4010
  "step": 667
4011
  },
4012
  {
4013
  "epoch": 2.55,
4014
  "learning_rate": 7.506361323155217e-06,
4015
+ "loss": 1.4829,
4016
  "step": 668
4017
  },
4018
  {
4019
  "epoch": 2.55,
4020
  "learning_rate": 7.4427480916030536e-06,
4021
+ "loss": 1.4837,
4022
  "step": 669
4023
  },
4024
  {
4025
  "epoch": 2.56,
4026
  "learning_rate": 7.379134860050891e-06,
4027
+ "loss": 1.4309,
4028
  "step": 670
4029
  },
4030
  {
4031
  "epoch": 2.56,
4032
  "learning_rate": 7.3155216284987285e-06,
4033
+ "loss": 1.579,
4034
  "step": 671
4035
  },
4036
  {
4037
  "epoch": 2.56,
4038
  "learning_rate": 7.251908396946565e-06,
4039
+ "loss": 1.3897,
4040
  "step": 672
4041
  },
4042
  {
4043
  "epoch": 2.57,
4044
  "learning_rate": 7.188295165394403e-06,
4045
+ "loss": 1.4868,
4046
  "step": 673
4047
  },
4048
  {
4049
  "epoch": 2.57,
4050
  "learning_rate": 7.12468193384224e-06,
4051
+ "loss": 1.5413,
4052
  "step": 674
4053
  },
4054
  {
4055
  "epoch": 2.58,
4056
  "learning_rate": 7.061068702290078e-06,
4057
+ "loss": 1.5012,
4058
  "step": 675
4059
  },
4060
  {
4061
  "epoch": 2.58,
4062
  "learning_rate": 6.997455470737914e-06,
4063
+ "loss": 1.4479,
4064
  "step": 676
4065
  },
4066
  {
4067
  "epoch": 2.58,
4068
  "learning_rate": 6.93384223918575e-06,
4069
+ "loss": 1.6027,
4070
  "step": 677
4071
  },
4072
  {
4073
  "epoch": 2.59,
4074
  "learning_rate": 6.870229007633589e-06,
4075
+ "loss": 1.5398,
4076
  "step": 678
4077
  },
4078
  {
4079
  "epoch": 2.59,
4080
  "learning_rate": 6.806615776081425e-06,
4081
+ "loss": 1.4343,
4082
  "step": 679
4083
  },
4084
  {
4085
  "epoch": 2.6,
4086
  "learning_rate": 6.743002544529263e-06,
4087
+ "loss": 1.4993,
4088
  "step": 680
4089
  },
4090
  {
4091
  "epoch": 2.6,
4092
  "learning_rate": 6.679389312977099e-06,
4093
+ "loss": 1.5968,
4094
  "step": 681
4095
  },
4096
  {
4097
  "epoch": 2.6,
4098
  "learning_rate": 6.615776081424936e-06,
4099
+ "loss": 1.573,
4100
  "step": 682
4101
  },
4102
  {
4103
  "epoch": 2.61,
4104
  "learning_rate": 6.552162849872774e-06,
4105
+ "loss": 1.5111,
4106
  "step": 683
4107
  },
4108
  {
4109
  "epoch": 2.61,
4110
  "learning_rate": 6.4885496183206104e-06,
4111
+ "loss": 1.5173,
4112
  "step": 684
4113
  },
4114
  {
4115
  "epoch": 2.61,
4116
  "learning_rate": 6.424936386768448e-06,
4117
+ "loss": 1.4858,
4118
  "step": 685
4119
  },
4120
  {
4121
  "epoch": 2.62,
4122
  "learning_rate": 6.3613231552162854e-06,
4123
+ "loss": 1.4872,
4124
  "step": 686
4125
  },
4126
  {
4127
  "epoch": 2.62,
4128
  "learning_rate": 6.297709923664122e-06,
4129
+ "loss": 1.6585,
4130
  "step": 687
4131
  },
4132
  {
4133
  "epoch": 2.63,
4134
  "learning_rate": 6.2340966921119596e-06,
4135
+ "loss": 1.5683,
4136
  "step": 688
4137
  },
4138
  {
4139
  "epoch": 2.63,
4140
  "learning_rate": 6.1704834605597975e-06,
4141
+ "loss": 1.552,
4142
  "step": 689
4143
  },
4144
  {
4145
  "epoch": 2.63,
4146
  "learning_rate": 6.106870229007634e-06,
4147
+ "loss": 1.5064,
4148
  "step": 690
4149
  },
4150
  {
4151
  "epoch": 2.64,
4152
  "learning_rate": 6.043256997455471e-06,
4153
+ "loss": 1.6124,
4154
  "step": 691
4155
  },
4156
  {
4157
  "epoch": 2.64,
4158
  "learning_rate": 5.979643765903308e-06,
4159
+ "loss": 1.5318,
4160
  "step": 692
4161
  },
4162
  {
4163
  "epoch": 2.65,
4164
  "learning_rate": 5.916030534351146e-06,
4165
+ "loss": 1.4545,
4166
  "step": 693
4167
  },
4168
  {
4169
  "epoch": 2.65,
4170
  "learning_rate": 5.852417302798983e-06,
4171
+ "loss": 1.5969,
4172
  "step": 694
4173
  },
4174
  {
4175
  "epoch": 2.65,
4176
  "learning_rate": 5.788804071246819e-06,
4177
+ "loss": 1.4931,
4178
  "step": 695
4179
  },
4180
  {
4181
  "epoch": 2.66,
4182
  "learning_rate": 5.725190839694657e-06,
4183
+ "loss": 1.6001,
4184
  "step": 696
4185
  },
4186
  {
4187
  "epoch": 2.66,
4188
  "learning_rate": 5.661577608142494e-06,
4189
+ "loss": 1.5452,
4190
  "step": 697
4191
  },
4192
  {
4193
  "epoch": 2.66,
4194
  "learning_rate": 5.597964376590331e-06,
4195
+ "loss": 1.4818,
4196
  "step": 698
4197
  },
4198
  {
4199
  "epoch": 2.67,
4200
  "learning_rate": 5.534351145038168e-06,
4201
+ "loss": 1.5295,
4202
  "step": 699
4203
  },
4204
  {
4205
  "epoch": 2.67,
4206
  "learning_rate": 5.470737913486006e-06,
4207
+ "loss": 1.4386,
4208
  "step": 700
4209
  },
4210
  {
4211
  "epoch": 2.68,
4212
  "learning_rate": 5.407124681933842e-06,
4213
+ "loss": 1.6439,
4214
  "step": 701
4215
  },
4216
  {
4217
  "epoch": 2.68,
4218
  "learning_rate": 5.343511450381679e-06,
4219
+ "loss": 1.5542,
4220
  "step": 702
4221
  },
4222
  {
4223
  "epoch": 2.68,
4224
  "learning_rate": 5.2798982188295165e-06,
4225
+ "loss": 1.5447,
4226
  "step": 703
4227
  },
4228
  {
4229
  "epoch": 2.69,
4230
  "learning_rate": 5.216284987277354e-06,
4231
+ "loss": 1.5099,
4232
  "step": 704
4233
  },
4234
  {
4235
  "epoch": 2.69,
4236
  "learning_rate": 5.1526717557251914e-06,
4237
+ "loss": 1.5497,
4238
  "step": 705
4239
  },
4240
  {
4241
  "epoch": 2.69,
4242
  "learning_rate": 5.089058524173028e-06,
4243
+ "loss": 1.5273,
4244
  "step": 706
4245
  },
4246
  {
4247
  "epoch": 2.7,
4248
  "learning_rate": 5.025445292620866e-06,
4249
+ "loss": 1.5756,
4250
  "step": 707
4251
  },
4252
  {
4253
  "epoch": 2.7,
4254
  "learning_rate": 4.961832061068703e-06,
4255
+ "loss": 1.4347,
4256
  "step": 708
4257
  },
4258
  {
4259
  "epoch": 2.71,
4260
  "learning_rate": 4.89821882951654e-06,
4261
+ "loss": 1.4573,
4262
  "step": 709
4263
  },
4264
  {
4265
  "epoch": 2.71,
4266
  "learning_rate": 4.834605597964377e-06,
4267
+ "loss": 1.531,
4268
  "step": 710
4269
  },
4270
  {
4271
  "epoch": 2.71,
4272
  "learning_rate": 4.770992366412214e-06,
4273
+ "loss": 1.5594,
4274
  "step": 711
4275
  },
4276
  {
4277
  "epoch": 2.72,
4278
  "learning_rate": 4.707379134860051e-06,
4279
+ "loss": 1.4137,
4280
  "step": 712
4281
  },
4282
  {
4283
  "epoch": 2.72,
4284
  "learning_rate": 4.643765903307888e-06,
4285
+ "loss": 1.4758,
4286
  "step": 713
4287
  },
4288
  {
4289
  "epoch": 2.73,
4290
  "learning_rate": 4.580152671755725e-06,
4291
+ "loss": 1.5364,
4292
  "step": 714
4293
  },
4294
  {
4295
  "epoch": 2.73,
4296
  "learning_rate": 4.516539440203563e-06,
4297
+ "loss": 1.5362,
4298
  "step": 715
4299
  },
4300
  {
4301
  "epoch": 2.73,
4302
  "learning_rate": 4.4529262086514e-06,
4303
+ "loss": 1.5036,
4304
  "step": 716
4305
  },
4306
  {
4307
  "epoch": 2.74,
4308
  "learning_rate": 4.389312977099236e-06,
4309
+ "loss": 1.4762,
4310
  "step": 717
4311
  },
4312
  {
4313
  "epoch": 2.74,
4314
  "learning_rate": 4.325699745547074e-06,
4315
+ "loss": 1.6156,
4316
  "step": 718
4317
  },
4318
  {
4319
  "epoch": 2.74,
4320
  "learning_rate": 4.262086513994911e-06,
4321
+ "loss": 1.6199,
4322
  "step": 719
4323
  },
4324
  {
4325
  "epoch": 2.75,
4326
  "learning_rate": 4.198473282442748e-06,
4327
+ "loss": 1.4238,
4328
  "step": 720
4329
  },
4330
  {
4331
  "epoch": 2.75,
4332
  "learning_rate": 4.134860050890585e-06,
4333
+ "loss": 1.4756,
4334
  "step": 721
4335
  },
4336
  {
4337
  "epoch": 2.76,
4338
  "learning_rate": 4.0712468193384225e-06,
4339
+ "loss": 1.5169,
4340
  "step": 722
4341
  },
4342
  {
4343
  "epoch": 2.76,
4344
  "learning_rate": 4.0076335877862595e-06,
4345
+ "loss": 1.5376,
4346
  "step": 723
4347
  },
4348
  {
4349
  "epoch": 2.76,
4350
  "learning_rate": 3.944020356234097e-06,
4351
+ "loss": 1.5023,
4352
  "step": 724
4353
  },
4354
  {
4355
  "epoch": 2.77,
4356
  "learning_rate": 3.880407124681934e-06,
4357
+ "loss": 1.5147,
4358
  "step": 725
4359
  },
4360
  {
4361
  "epoch": 2.77,
4362
  "learning_rate": 3.816793893129772e-06,
4363
+ "loss": 1.5219,
4364
  "step": 726
4365
  },
4366
  {
4367
  "epoch": 2.77,
4368
  "learning_rate": 3.7531806615776087e-06,
4369
+ "loss": 1.4531,
4370
  "step": 727
4371
  },
4372
  {
4373
  "epoch": 2.78,
4374
  "learning_rate": 3.6895674300254453e-06,
4375
+ "loss": 1.6463,
4376
  "step": 728
4377
  },
4378
  {
4379
  "epoch": 2.78,
4380
  "learning_rate": 3.6259541984732824e-06,
4381
+ "loss": 1.5067,
4382
  "step": 729
4383
  },
4384
  {
4385
  "epoch": 2.79,
4386
  "learning_rate": 3.56234096692112e-06,
4387
+ "loss": 1.5814,
4388
  "step": 730
4389
  },
4390
  {
4391
  "epoch": 2.79,
4392
  "learning_rate": 3.498727735368957e-06,
4393
+ "loss": 1.5162,
4394
  "step": 731
4395
  },
4396
  {
4397
  "epoch": 2.79,
4398
  "learning_rate": 3.4351145038167944e-06,
4399
+ "loss": 1.5473,
4400
  "step": 732
4401
  },
4402
  {
4403
  "epoch": 2.8,
4404
  "learning_rate": 3.3715012722646315e-06,
4405
+ "loss": 1.5247,
4406
  "step": 733
4407
  },
4408
  {
4409
  "epoch": 2.8,
4410
  "learning_rate": 3.307888040712468e-06,
4411
+ "loss": 1.6345,
4412
  "step": 734
4413
  },
4414
  {
4415
  "epoch": 2.81,
4416
  "learning_rate": 3.2442748091603052e-06,
4417
+ "loss": 1.5223,
4418
  "step": 735
4419
  },
4420
  {
4421
  "epoch": 2.81,
4422
  "learning_rate": 3.1806615776081427e-06,
4423
+ "loss": 1.5291,
4424
  "step": 736
4425
  },
4426
  {
4427
  "epoch": 2.81,
4428
  "learning_rate": 3.1170483460559798e-06,
4429
+ "loss": 1.446,
4430
  "step": 737
4431
  },
4432
  {
4433
  "epoch": 2.82,
4434
  "learning_rate": 3.053435114503817e-06,
4435
+ "loss": 1.4595,
4436
  "step": 738
4437
  },
4438
  {
4439
  "epoch": 2.82,
4440
  "learning_rate": 2.989821882951654e-06,
4441
+ "loss": 1.4902,
4442
  "step": 739
4443
  },
4444
  {
4445
  "epoch": 2.82,
4446
  "learning_rate": 2.9262086513994914e-06,
4447
+ "loss": 1.4615,
4448
  "step": 740
4449
  },
4450
  {
4451
  "epoch": 2.83,
4452
  "learning_rate": 2.8625954198473285e-06,
4453
+ "loss": 1.4374,
4454
  "step": 741
4455
  },
4456
  {
4457
  "epoch": 2.83,
4458
  "learning_rate": 2.7989821882951656e-06,
4459
+ "loss": 1.5031,
4460
  "step": 742
4461
  },
4462
  {
4463
  "epoch": 2.84,
4464
  "learning_rate": 2.735368956743003e-06,
4465
+ "loss": 1.48,
4466
  "step": 743
4467
  },
4468
  {
4469
  "epoch": 2.84,
4470
  "learning_rate": 2.6717557251908397e-06,
4471
+ "loss": 1.4623,
4472
  "step": 744
4473
  },
4474
  {
4475
  "epoch": 2.84,
4476
  "learning_rate": 2.608142493638677e-06,
4477
+ "loss": 1.4165,
4478
  "step": 745
4479
  },
4480
  {
4481
  "epoch": 2.85,
4482
  "learning_rate": 2.544529262086514e-06,
4483
+ "loss": 1.4686,
4484
  "step": 746
4485
  },
4486
  {
4487
  "epoch": 2.85,
4488
  "learning_rate": 2.4809160305343513e-06,
4489
+ "loss": 1.4304,
4490
  "step": 747
4491
  },
4492
  {
4493
  "epoch": 2.85,
4494
  "learning_rate": 2.4173027989821884e-06,
4495
+ "loss": 1.4525,
4496
  "step": 748
4497
  },
4498
  {
4499
  "epoch": 2.86,
4500
  "learning_rate": 2.3536895674300255e-06,
4501
+ "loss": 1.4141,
4502
  "step": 749
4503
  },
4504
  {
4505
  "epoch": 2.86,
4506
  "learning_rate": 2.2900763358778625e-06,
4507
+ "loss": 1.5207,
4508
  "step": 750
4509
  },
4510
  {
4511
  "epoch": 2.87,
4512
  "learning_rate": 2.2264631043257e-06,
4513
+ "loss": 1.5353,
4514
  "step": 751
4515
  },
4516
  {
4517
  "epoch": 2.87,
4518
  "learning_rate": 2.162849872773537e-06,
4519
+ "loss": 1.4781,
4520
  "step": 752
4521
  },
4522
  {
4523
  "epoch": 2.87,
4524
  "learning_rate": 2.099236641221374e-06,
4525
+ "loss": 1.5691,
4526
  "step": 753
4527
  },
4528
  {
4529
  "epoch": 2.88,
4530
  "learning_rate": 2.0356234096692112e-06,
4531
+ "loss": 1.6929,
4532
  "step": 754
4533
  },
4534
  {
4535
  "epoch": 2.88,
4536
  "learning_rate": 1.9720101781170483e-06,
4537
+ "loss": 1.6577,
4538
  "step": 755
4539
  },
4540
  {
4541
  "epoch": 2.89,
4542
  "learning_rate": 1.908396946564886e-06,
4543
+ "loss": 1.4755,
4544
  "step": 756
4545
  },
4546
  {
4547
  "epoch": 2.89,
4548
  "learning_rate": 1.8447837150127227e-06,
4549
+ "loss": 1.6568,
4550
  "step": 757
4551
  },
4552
  {
4553
  "epoch": 2.89,
4554
  "learning_rate": 1.78117048346056e-06,
4555
+ "loss": 1.4796,
4556
  "step": 758
4557
  },
4558
  {
4559
  "epoch": 2.9,
4560
  "learning_rate": 1.7175572519083972e-06,
4561
+ "loss": 1.5003,
4562
  "step": 759
4563
  },
4564
  {
4565
  "epoch": 2.9,
4566
  "learning_rate": 1.653944020356234e-06,
4567
+ "loss": 1.5775,
4568
  "step": 760
4569
  },
4570
  {
4571
  "epoch": 2.9,
4572
  "learning_rate": 1.5903307888040714e-06,
4573
+ "loss": 1.5154,
4574
  "step": 761
4575
  },
4576
  {
4577
  "epoch": 2.91,
4578
  "learning_rate": 1.5267175572519084e-06,
4579
+ "loss": 1.4024,
4580
  "step": 762
4581
  },
4582
  {
4583
  "epoch": 2.91,
4584
  "learning_rate": 1.4631043256997457e-06,
4585
+ "loss": 1.4939,
4586
  "step": 763
4587
  },
4588
  {
4589
  "epoch": 2.92,
4590
  "learning_rate": 1.3994910941475828e-06,
4591
+ "loss": 1.5221,
4592
  "step": 764
4593
  },
4594
  {
4595
  "epoch": 2.92,
4596
  "learning_rate": 1.3358778625954198e-06,
4597
+ "loss": 1.5407,
4598
  "step": 765
4599
  },
4600
  {
4601
  "epoch": 2.92,
4602
  "learning_rate": 1.272264631043257e-06,
4603
+ "loss": 1.5351,
4604
  "step": 766
4605
  },
4606
  {
4607
  "epoch": 2.93,
4608
  "learning_rate": 1.2086513994910942e-06,
4609
+ "loss": 1.4985,
4610
  "step": 767
4611
  },
4612
  {
4613
  "epoch": 2.93,
4614
  "learning_rate": 1.1450381679389313e-06,
4615
+ "loss": 1.5047,
4616
  "step": 768
4617
  },
4618
  {
4619
  "epoch": 2.94,
4620
  "learning_rate": 1.0814249363867685e-06,
4621
+ "loss": 1.5272,
4622
  "step": 769
4623
  },
4624
  {
4625
  "epoch": 2.94,
4626
  "learning_rate": 1.0178117048346056e-06,
4627
+ "loss": 1.4864,
4628
  "step": 770
4629
  },
4630
  {
4631
  "epoch": 2.94,
4632
  "learning_rate": 9.54198473282443e-07,
4633
+ "loss": 1.5018,
4634
  "step": 771
4635
  },
4636
  {
4637
  "epoch": 2.95,
4638
  "learning_rate": 8.9058524173028e-07,
4639
+ "loss": 1.5671,
4640
  "step": 772
4641
  },
4642
  {
4643
  "epoch": 2.95,
4644
  "learning_rate": 8.26972010178117e-07,
4645
+ "loss": 1.5599,
4646
  "step": 773
4647
  },
4648
  {
4649
  "epoch": 2.95,
4650
  "learning_rate": 7.633587786259542e-07,
4651
+ "loss": 1.5463,
4652
  "step": 774
4653
  },
4654
  {
4655
  "epoch": 2.96,
4656
  "learning_rate": 6.997455470737914e-07,
4657
+ "loss": 1.4985,
4658
  "step": 775
4659
  },
4660
  {
4661
  "epoch": 2.96,
4662
  "learning_rate": 6.361323155216285e-07,
4663
+ "loss": 1.4801,
4664
  "step": 776
4665
  },
4666
  {
4667
  "epoch": 2.97,
4668
  "learning_rate": 5.725190839694656e-07,
4669
+ "loss": 1.6203,
4670
  "step": 777
4671
  },
4672
  {
4673
  "epoch": 2.97,
4674
  "learning_rate": 5.089058524173028e-07,
4675
+ "loss": 1.5507,
4676
  "step": 778
4677
  },
4678
  {
4679
  "epoch": 2.97,
4680
  "learning_rate": 4.4529262086514e-07,
4681
+ "loss": 1.5613,
4682
  "step": 779
4683
  },
4684
  {
4685
  "epoch": 2.98,
4686
  "learning_rate": 3.816793893129771e-07,
4687
+ "loss": 1.5681,
4688
  "step": 780
4689
  },
4690
  {
4691
  "epoch": 2.98,
4692
  "learning_rate": 3.1806615776081423e-07,
4693
+ "loss": 1.4898,
4694
  "step": 781
4695
  },
4696
  {
4697
  "epoch": 2.98,
4698
  "learning_rate": 2.544529262086514e-07,
4699
+ "loss": 1.5687,
4700
  "step": 782
4701
  },
4702
  {
4703
  "epoch": 2.99,
4704
  "learning_rate": 1.9083969465648855e-07,
4705
+ "loss": 1.5121,
4706
  "step": 783
4707
  },
4708
  {
4709
  "epoch": 2.99,
4710
  "learning_rate": 1.272264631043257e-07,
4711
+ "loss": 1.5745,
4712
  "step": 784
4713
  },
4714
  {
4715
  "epoch": 3.0,
4716
  "learning_rate": 6.361323155216285e-08,
4717
+ "loss": 1.4844,
4718
  "step": 785
4719
  },
4720
  {
4721
  "epoch": 3.0,
4722
  "learning_rate": 0.0,
4723
+ "loss": 1.3335,
4724
  "step": 786
4725
  },
4726
  {
4727
  "epoch": 3.0,
4728
  "step": 786,
4729
  "total_flos": 1.2699738389348352e+16,
4730
+ "train_loss": 0.5571378069069549,
4731
+ "train_runtime": 308.6292,
4732
+ "train_samples_per_second": 60.957,
4733
+ "train_steps_per_second": 2.547
4734
  }
4735
  ],
4736
  "max_steps": 786,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99215543b93de901650f1a65728c6e7938885f84434e76915befc1334508cade
3
  size 3567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852762d4791077e5ef39a416398ffe5419d39e26caa52ed1b2dde8cb84825133
3
  size 3567