anton-l HF staff commited on
Commit
9dea4a9
1 Parent(s): 3a97e0e

Model save

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.1620459199016551,
4
- "train_runtime": 43631.9674,
5
  "train_samples": 246408,
6
- "train_samples_per_second": 28.237,
7
- "train_steps_per_second": 0.441
8
  }
1
  {
2
+ "epoch": 4.07,
3
+ "train_loss": 5.854813561474184e-06,
4
+ "train_runtime": 574.8329,
5
  "train_samples": 246408,
6
+ "train_samples_per_second": 2143.301,
7
+ "train_steps_per_second": 33.497
8
  }
emissions.csv CHANGED
@@ -1,3 +1,4 @@
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2022-04-07T05:24:20,9d6f504f-f106-4b54-a3c9-d2cc9c100918,codecarbon,43632.017745018005,9.320191315050605,16.458045762052986,USA,USA,Iowa,Y,gcp,us-central1
3
  2022-04-12T17:58:32,2ca859a2-6a59-473a-bf34-566c1ed805fb,codecarbon,574.8996481895447,0.12711368634643946,0.22446351111855814,USA,USA,Iowa,Y,gcp,us-central1
 
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2022-04-07T05:24:20,9d6f504f-f106-4b54-a3c9-d2cc9c100918,codecarbon,43632.017745018005,9.320191315050605,16.458045762052986,USA,USA,Iowa,Y,gcp,us-central1
3
  2022-04-12T17:58:32,2ca859a2-6a59-473a-bf34-566c1ed805fb,codecarbon,574.8996481895447,0.12711368634643946,0.22446351111855814,USA,USA,Iowa,Y,gcp,us-central1
4
+ 2022-04-12T18:27:42,01e54279-0c0b-4c7b-9048-6e3d6739fae8,codecarbon,568.9987049102783,0.13174037435842878,0.23263354115915383,USA,USA,Iowa,Y,gcp,us-central1
runs/Apr12_17-48-07_anton-xtreme-s/events.out.tfevents.1649786354.anton-xtreme-s.19165.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f705f4114104cc28f687298b4b63607ed440f69f41dd5b38ea3a570018a088b
3
+ size 17148
runs/Apr12_18-17-31_anton-xtreme-s/1649787493.4871702/events.out.tfevents.1649787493.anton-xtreme-s.23063.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73dfcda3e498e330cc9234085c6538dc61b38909edcc6934b8fab3f8ae066332
3
+ size 5092
runs/Apr12_18-17-31_anton-xtreme-s/events.out.tfevents.1649787493.anton-xtreme-s.23063.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bff3ff6cb011e00f2d5b95a99babd33d3013ad22e6fc9ed8658928033ae83dd1
3
+ size 49840
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.1620459199016551,
4
- "train_runtime": 43631.9674,
5
  "train_samples": 246408,
6
- "train_samples_per_second": 28.237,
7
- "train_steps_per_second": 0.441
8
  }
1
  {
2
+ "epoch": 4.07,
3
+ "train_loss": 5.854813561474184e-06,
4
+ "train_runtime": 574.8329,
5
  "train_samples": 246408,
6
+ "train_samples_per_second": 2143.301,
7
+ "train_steps_per_second": 33.497
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 0.7236466414789606,
3
  "best_model_checkpoint": "xtreme_s_xlsr_300m_fleurs_langid_truncated/checkpoint-19000",
4
- "epoch": 4.999870146734191,
5
- "global_step": 19250,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -114179,1518 +114179,1548 @@
114179
  "step": 19000
114180
  },
114181
  {
114182
- "epoch": 4.94,
114183
- "learning_rate": 4.48695652173913e-06,
114184
  "loss": 0.0001,
114185
  "step": 19001
114186
  },
114187
  {
114188
- "epoch": 4.94,
114189
- "learning_rate": 4.469565217391304e-06,
114190
- "loss": 0.0,
114191
  "step": 19002
114192
  },
114193
  {
114194
- "epoch": 4.94,
114195
- "learning_rate": 4.452173913043477e-06,
114196
  "loss": 0.0001,
114197
  "step": 19003
114198
  },
114199
  {
114200
- "epoch": 4.94,
114201
- "learning_rate": 4.434782608695652e-06,
114202
- "loss": 0.0001,
114203
  "step": 19004
114204
  },
114205
  {
114206
- "epoch": 4.94,
114207
- "learning_rate": 4.417391304347826e-06,
114208
- "loss": 0.0,
114209
  "step": 19005
114210
  },
114211
  {
114212
- "epoch": 4.94,
114213
- "learning_rate": 4.399999999999999e-06,
114214
- "loss": 0.0,
114215
  "step": 19006
114216
  },
114217
  {
114218
- "epoch": 4.94,
114219
- "learning_rate": 4.382608695652174e-06,
114220
- "loss": 0.0002,
114221
  "step": 19007
114222
  },
114223
  {
114224
- "epoch": 4.94,
114225
- "learning_rate": 4.365217391304348e-06,
114226
- "loss": 0.0,
114227
  "step": 19008
114228
  },
114229
  {
114230
- "epoch": 4.94,
114231
- "learning_rate": 4.347826086956521e-06,
114232
- "loss": 0.0001,
114233
  "step": 19009
114234
  },
114235
  {
114236
- "epoch": 4.94,
114237
- "learning_rate": 4.330434782608695e-06,
114238
- "loss": 0.0,
114239
  "step": 19010
114240
  },
114241
  {
114242
- "epoch": 4.94,
114243
- "learning_rate": 4.313043478260869e-06,
114244
- "loss": 0.0,
114245
  "step": 19011
114246
  },
114247
  {
114248
- "epoch": 4.94,
114249
- "learning_rate": 4.295652173913043e-06,
114250
- "loss": 0.0001,
114251
  "step": 19012
114252
  },
114253
  {
114254
- "epoch": 4.94,
114255
- "learning_rate": 4.278260869565217e-06,
114256
- "loss": 0.0001,
114257
  "step": 19013
114258
  },
114259
  {
114260
- "epoch": 4.94,
114261
- "learning_rate": 4.260869565217391e-06,
114262
  "loss": 0.0001,
114263
  "step": 19014
114264
  },
114265
  {
114266
- "epoch": 4.94,
114267
- "learning_rate": 4.2434782608695645e-06,
114268
- "loss": 0.0001,
114269
  "step": 19015
114270
  },
114271
  {
114272
- "epoch": 4.94,
114273
- "learning_rate": 4.226086956521739e-06,
114274
  "loss": 0.0001,
114275
  "step": 19016
114276
  },
114277
  {
114278
- "epoch": 4.94,
114279
- "learning_rate": 4.208695652173913e-06,
114280
- "loss": 0.0001,
114281
  "step": 19017
114282
  },
114283
  {
114284
- "epoch": 4.94,
114285
- "learning_rate": 4.1913043478260865e-06,
114286
- "loss": 0.0001,
114287
  "step": 19018
114288
  },
114289
  {
114290
- "epoch": 4.94,
114291
- "learning_rate": 4.173913043478261e-06,
114292
- "loss": 0.0,
114293
  "step": 19019
114294
  },
114295
  {
114296
- "epoch": 4.94,
114297
- "learning_rate": 4.156521739130434e-06,
114298
- "loss": 0.0,
114299
  "step": 19020
114300
  },
114301
  {
114302
- "epoch": 4.94,
114303
- "learning_rate": 4.1391304347826084e-06,
114304
- "loss": 0.0001,
114305
  "step": 19021
114306
  },
114307
  {
114308
- "epoch": 4.94,
114309
- "learning_rate": 4.121739130434782e-06,
114310
  "loss": 0.0001,
114311
  "step": 19022
114312
  },
114313
  {
114314
- "epoch": 4.94,
114315
- "learning_rate": 4.104347826086956e-06,
114316
  "loss": 0.0001,
114317
  "step": 19023
114318
  },
114319
  {
114320
- "epoch": 4.94,
114321
- "learning_rate": 4.08695652173913e-06,
114322
  "loss": 0.0001,
114323
  "step": 19024
114324
  },
114325
  {
114326
- "epoch": 4.94,
114327
- "learning_rate": 4.069565217391304e-06,
114328
  "loss": 0.0001,
114329
  "step": 19025
114330
  },
114331
  {
114332
- "epoch": 4.94,
114333
- "learning_rate": 4.052173913043478e-06,
114334
- "loss": 0.0,
114335
  "step": 19026
114336
  },
114337
  {
114338
- "epoch": 4.94,
114339
- "learning_rate": 4.0347826086956515e-06,
114340
- "loss": 0.0,
114341
  "step": 19027
114342
  },
114343
  {
114344
- "epoch": 4.94,
114345
- "learning_rate": 4.017391304347826e-06,
114346
  "loss": 0.0001,
114347
  "step": 19028
114348
  },
114349
  {
114350
- "epoch": 4.94,
114351
- "learning_rate": 4e-06,
114352
  "loss": 0.0,
114353
  "step": 19029
114354
  },
114355
  {
114356
- "epoch": 4.94,
114357
- "learning_rate": 3.9826086956521735e-06,
114358
  "loss": 0.0001,
114359
  "step": 19030
114360
  },
114361
  {
114362
- "epoch": 4.94,
114363
- "learning_rate": 3.965217391304347e-06,
114364
- "loss": 0.0,
114365
  "step": 19031
114366
  },
114367
  {
114368
- "epoch": 4.94,
114369
- "learning_rate": 3.947826086956521e-06,
114370
- "loss": 0.0001,
114371
  "step": 19032
114372
  },
114373
  {
114374
- "epoch": 4.94,
114375
- "learning_rate": 3.9304347826086955e-06,
114376
  "loss": 0.0,
114377
  "step": 19033
114378
  },
114379
  {
114380
- "epoch": 4.94,
114381
- "learning_rate": 3.913043478260869e-06,
114382
  "loss": 0.0001,
114383
  "step": 19034
114384
  },
114385
  {
114386
- "epoch": 4.94,
114387
- "learning_rate": 3.895652173913043e-06,
114388
  "loss": 0.0001,
114389
  "step": 19035
114390
  },
114391
  {
114392
- "epoch": 4.94,
114393
- "learning_rate": 3.8782608695652175e-06,
114394
- "loss": 0.0,
114395
  "step": 19036
114396
  },
114397
  {
114398
- "epoch": 4.94,
114399
- "learning_rate": 3.860869565217391e-06,
114400
  "loss": 0.0001,
114401
  "step": 19037
114402
  },
114403
  {
114404
- "epoch": 4.94,
114405
- "learning_rate": 3.843478260869565e-06,
114406
  "loss": 0.0001,
114407
  "step": 19038
114408
  },
114409
  {
114410
- "epoch": 4.95,
114411
- "learning_rate": 3.826086956521739e-06,
114412
- "loss": 0.0017,
114413
  "step": 19039
114414
  },
114415
  {
114416
- "epoch": 4.95,
114417
- "learning_rate": 3.8086956521739128e-06,
114418
- "loss": 0.0001,
114419
  "step": 19040
114420
  },
114421
  {
114422
- "epoch": 4.95,
114423
- "learning_rate": 3.7913043478260865e-06,
114424
- "loss": 0.0002,
114425
  "step": 19041
114426
  },
114427
  {
114428
- "epoch": 4.95,
114429
- "learning_rate": 3.7739130434782606e-06,
114430
  "loss": 0.0001,
114431
  "step": 19042
114432
  },
114433
  {
114434
- "epoch": 4.95,
114435
- "learning_rate": 3.7565217391304347e-06,
114436
  "loss": 0.0001,
114437
  "step": 19043
114438
  },
114439
  {
114440
- "epoch": 4.95,
114441
- "learning_rate": 3.7391304347826085e-06,
114442
- "loss": 0.0001,
114443
  "step": 19044
114444
  },
114445
  {
114446
- "epoch": 4.95,
114447
- "learning_rate": 3.7217391304347826e-06,
114448
  "loss": 0.0001,
114449
  "step": 19045
114450
  },
114451
  {
114452
- "epoch": 4.95,
114453
- "learning_rate": 3.704347826086956e-06,
114454
  "loss": 0.0001,
114455
  "step": 19046
114456
  },
114457
  {
114458
- "epoch": 4.95,
114459
- "learning_rate": 3.68695652173913e-06,
114460
- "loss": 0.0,
114461
  "step": 19047
114462
  },
114463
  {
114464
- "epoch": 4.95,
114465
- "learning_rate": 3.669565217391304e-06,
114466
  "loss": 0.0,
114467
  "step": 19048
114468
  },
114469
  {
114470
- "epoch": 4.95,
114471
- "learning_rate": 3.652173913043478e-06,
114472
  "loss": 0.0,
114473
  "step": 19049
114474
  },
114475
  {
114476
- "epoch": 4.95,
114477
- "learning_rate": 3.634782608695652e-06,
114478
- "loss": 0.0002,
114479
  "step": 19050
114480
  },
114481
  {
114482
- "epoch": 4.95,
114483
- "learning_rate": 3.6173913043478257e-06,
114484
- "loss": 0.0,
114485
  "step": 19051
114486
  },
114487
  {
114488
- "epoch": 4.95,
114489
- "learning_rate": 3.6e-06,
114490
- "loss": 0.0,
114491
  "step": 19052
114492
  },
114493
  {
114494
- "epoch": 4.95,
114495
- "learning_rate": 3.5826086956521735e-06,
114496
- "loss": 0.0,
114497
  "step": 19053
114498
  },
114499
  {
114500
- "epoch": 4.95,
114501
- "learning_rate": 3.5652173913043477e-06,
114502
- "loss": 0.0001,
114503
  "step": 19054
114504
  },
114505
  {
114506
- "epoch": 4.95,
114507
- "learning_rate": 3.547826086956522e-06,
114508
- "loss": 0.0,
114509
  "step": 19055
114510
  },
114511
  {
114512
- "epoch": 4.95,
114513
- "learning_rate": 3.530434782608695e-06,
114514
  "loss": 0.0001,
114515
  "step": 19056
114516
  },
114517
  {
114518
- "epoch": 4.95,
114519
- "learning_rate": 3.5130434782608692e-06,
114520
  "loss": 0.0001,
114521
  "step": 19057
114522
  },
114523
  {
114524
- "epoch": 4.95,
114525
- "learning_rate": 3.495652173913043e-06,
114526
  "loss": 0.0001,
114527
  "step": 19058
114528
  },
114529
  {
114530
- "epoch": 4.95,
114531
- "learning_rate": 3.478260869565217e-06,
114532
- "loss": 0.0001,
114533
  "step": 19059
114534
  },
114535
  {
114536
- "epoch": 4.95,
114537
- "learning_rate": 3.460869565217391e-06,
114538
- "loss": 0.0,
114539
  "step": 19060
114540
  },
114541
  {
114542
- "epoch": 4.95,
114543
- "learning_rate": 3.443478260869565e-06,
114544
- "loss": 0.0002,
114545
  "step": 19061
114546
  },
114547
  {
114548
- "epoch": 4.95,
114549
- "learning_rate": 3.426086956521739e-06,
114550
- "loss": 0.0001,
114551
  "step": 19062
114552
  },
114553
  {
114554
- "epoch": 4.95,
114555
- "learning_rate": 3.4086956521739128e-06,
114556
- "loss": 0.0001,
114557
  "step": 19063
114558
  },
114559
  {
114560
- "epoch": 4.95,
114561
- "learning_rate": 3.391304347826087e-06,
114562
- "loss": 0.0,
114563
  "step": 19064
114564
  },
114565
  {
114566
- "epoch": 4.95,
114567
- "learning_rate": 3.37391304347826e-06,
114568
  "loss": 0.0001,
114569
  "step": 19065
114570
  },
114571
  {
114572
- "epoch": 4.95,
114573
- "learning_rate": 3.3565217391304343e-06,
114574
- "loss": 0.0,
114575
  "step": 19066
114576
  },
114577
  {
114578
- "epoch": 4.95,
114579
- "learning_rate": 3.3391304347826085e-06,
114580
  "loss": 0.0001,
114581
  "step": 19067
114582
  },
114583
  {
114584
- "epoch": 4.95,
114585
- "learning_rate": 3.321739130434782e-06,
114586
- "loss": 0.0001,
114587
  "step": 19068
114588
  },
114589
  {
114590
- "epoch": 4.95,
114591
- "learning_rate": 3.3043478260869563e-06,
114592
  "loss": 0.0001,
114593
  "step": 19069
114594
  },
114595
  {
114596
- "epoch": 4.95,
114597
- "learning_rate": 3.28695652173913e-06,
114598
  "loss": 0.0,
114599
  "step": 19070
114600
  },
114601
  {
114602
- "epoch": 4.95,
114603
- "learning_rate": 3.269565217391304e-06,
114604
- "loss": 0.0,
114605
  "step": 19071
114606
  },
114607
  {
114608
- "epoch": 4.95,
114609
- "learning_rate": 3.252173913043478e-06,
114610
  "loss": 0.0,
114611
  "step": 19072
114612
  },
114613
  {
114614
- "epoch": 4.95,
114615
- "learning_rate": 3.234782608695652e-06,
114616
  "loss": 0.0001,
114617
  "step": 19073
114618
  },
114619
  {
114620
- "epoch": 4.95,
114621
- "learning_rate": 3.217391304347826e-06,
114622
- "loss": 0.0001,
114623
  "step": 19074
114624
  },
114625
  {
114626
- "epoch": 4.95,
114627
- "learning_rate": 3.1999999999999994e-06,
114628
  "loss": 0.0001,
114629
  "step": 19075
114630
  },
114631
  {
114632
- "epoch": 4.95,
114633
- "learning_rate": 3.1826086956521736e-06,
114634
  "loss": 0.0,
114635
  "step": 19076
114636
  },
114637
  {
114638
- "epoch": 4.95,
114639
- "learning_rate": 3.1652173913043473e-06,
114640
- "loss": 0.0001,
114641
  "step": 19077
114642
  },
114643
  {
114644
- "epoch": 4.96,
114645
- "learning_rate": 3.1478260869565214e-06,
114646
  "loss": 0.0,
114647
  "step": 19078
114648
  },
114649
  {
114650
- "epoch": 4.96,
114651
- "learning_rate": 3.1304347826086955e-06,
114652
  "loss": 0.0,
114653
  "step": 19079
114654
  },
114655
  {
114656
- "epoch": 4.96,
114657
- "learning_rate": 3.1130434782608693e-06,
114658
- "loss": 0.0001,
114659
  "step": 19080
114660
  },
114661
  {
114662
- "epoch": 4.96,
114663
- "learning_rate": 3.0956521739130434e-06,
114664
  "loss": 0.0001,
114665
  "step": 19081
114666
  },
114667
  {
114668
- "epoch": 4.96,
114669
- "learning_rate": 3.078260869565217e-06,
114670
- "loss": 0.0032,
114671
  "step": 19082
114672
  },
114673
  {
114674
- "epoch": 4.96,
114675
- "learning_rate": 3.0608695652173912e-06,
114676
- "loss": 0.0001,
114677
  "step": 19083
114678
  },
114679
  {
114680
- "epoch": 4.96,
114681
- "learning_rate": 3.0434782608695645e-06,
114682
- "loss": 0.0001,
114683
  "step": 19084
114684
  },
114685
  {
114686
- "epoch": 4.96,
114687
- "learning_rate": 3.0260869565217387e-06,
114688
- "loss": 0.0,
114689
  "step": 19085
114690
  },
114691
  {
114692
- "epoch": 4.96,
114693
- "learning_rate": 3.008695652173913e-06,
114694
- "loss": 0.0001,
114695
  "step": 19086
114696
  },
114697
  {
114698
- "epoch": 4.96,
114699
- "learning_rate": 2.9913043478260865e-06,
114700
- "loss": 0.0001,
114701
  "step": 19087
114702
  },
114703
  {
114704
- "epoch": 4.96,
114705
- "learning_rate": 2.9739130434782606e-06,
114706
- "loss": 0.0001,
114707
  "step": 19088
114708
  },
114709
  {
114710
- "epoch": 4.96,
114711
- "learning_rate": 2.9565217391304344e-06,
114712
- "loss": 0.0,
114713
  "step": 19089
114714
  },
114715
  {
114716
- "epoch": 4.96,
114717
- "learning_rate": 2.9391304347826085e-06,
114718
  "loss": 0.0002,
114719
  "step": 19090
114720
  },
114721
  {
114722
- "epoch": 4.96,
114723
- "learning_rate": 2.921739130434782e-06,
114724
- "loss": 0.0,
114725
  "step": 19091
114726
  },
114727
  {
114728
- "epoch": 4.96,
114729
- "learning_rate": 2.9043478260869563e-06,
114730
- "loss": 0.0002,
114731
  "step": 19092
114732
  },
114733
  {
114734
- "epoch": 4.96,
114735
- "learning_rate": 2.8869565217391305e-06,
114736
- "loss": 0.0001,
114737
  "step": 19093
114738
  },
114739
  {
114740
- "epoch": 4.96,
114741
- "learning_rate": 2.869565217391304e-06,
114742
- "loss": 0.0001,
114743
  "step": 19094
114744
  },
114745
  {
114746
- "epoch": 4.96,
114747
- "learning_rate": 2.8521739130434783e-06,
114748
  "loss": 0.0001,
114749
  "step": 19095
114750
  },
114751
  {
114752
- "epoch": 4.96,
114753
- "learning_rate": 2.8347826086956516e-06,
114754
- "loss": 0.0,
114755
  "step": 19096
114756
  },
114757
  {
114758
- "epoch": 4.96,
114759
- "learning_rate": 2.8173913043478257e-06,
114760
- "loss": 0.0004,
114761
  "step": 19097
114762
  },
114763
  {
114764
- "epoch": 4.96,
114765
- "learning_rate": 2.8e-06,
114766
- "loss": 0.0001,
114767
  "step": 19098
114768
  },
114769
  {
114770
- "epoch": 4.96,
114771
- "learning_rate": 2.7826086956521736e-06,
114772
- "loss": 0.0,
114773
  "step": 19099
114774
  },
114775
  {
114776
- "epoch": 4.96,
114777
- "learning_rate": 2.7652173913043477e-06,
114778
- "loss": 0.0001,
114779
  "step": 19100
114780
  },
114781
  {
114782
- "epoch": 4.96,
114783
- "learning_rate": 2.7478260869565214e-06,
114784
  "loss": 0.0001,
114785
  "step": 19101
114786
  },
114787
  {
114788
- "epoch": 4.96,
114789
- "learning_rate": 2.7304347826086956e-06,
114790
- "loss": 0.0,
114791
  "step": 19102
114792
  },
114793
  {
114794
- "epoch": 4.96,
114795
- "learning_rate": 2.7130434782608693e-06,
114796
- "loss": 0.0,
114797
  "step": 19103
114798
  },
114799
  {
114800
- "epoch": 4.96,
114801
- "learning_rate": 2.6956521739130434e-06,
114802
- "loss": 0.0001,
114803
  "step": 19104
114804
  },
114805
  {
114806
- "epoch": 4.96,
114807
- "learning_rate": 2.6782608695652175e-06,
114808
  "loss": 0.0001,
114809
  "step": 19105
114810
  },
114811
  {
114812
- "epoch": 4.96,
114813
- "learning_rate": 2.660869565217391e-06,
114814
- "loss": 0.0001,
114815
  "step": 19106
114816
  },
114817
  {
114818
- "epoch": 4.96,
114819
- "learning_rate": 2.643478260869565e-06,
114820
  "loss": 0.0001,
114821
  "step": 19107
114822
  },
114823
  {
114824
- "epoch": 4.96,
114825
- "learning_rate": 2.6260869565217387e-06,
114826
  "loss": 0.0001,
114827
  "step": 19108
114828
  },
114829
  {
114830
- "epoch": 4.96,
114831
- "learning_rate": 2.608695652173913e-06,
114832
  "loss": 0.0001,
114833
  "step": 19109
114834
  },
114835
  {
114836
- "epoch": 4.96,
114837
- "learning_rate": 2.591304347826087e-06,
114838
- "loss": 0.0001,
114839
  "step": 19110
114840
  },
114841
  {
114842
- "epoch": 4.96,
114843
- "learning_rate": 2.5739130434782607e-06,
114844
  "loss": 0.0,
114845
  "step": 19111
114846
  },
114847
  {
114848
- "epoch": 4.96,
114849
- "learning_rate": 2.556521739130435e-06,
114850
  "loss": 0.0001,
114851
  "step": 19112
114852
  },
114853
  {
114854
- "epoch": 4.96,
114855
- "learning_rate": 2.5391304347826085e-06,
114856
  "loss": 0.0001,
114857
  "step": 19113
114858
  },
114859
  {
114860
- "epoch": 4.96,
114861
- "learning_rate": 2.5217391304347826e-06,
114862
- "loss": 0.0,
114863
  "step": 19114
114864
  },
114865
  {
114866
- "epoch": 4.96,
114867
- "learning_rate": 2.504347826086956e-06,
114868
  "loss": 0.0001,
114869
  "step": 19115
114870
  },
114871
  {
114872
- "epoch": 4.97,
114873
- "learning_rate": 2.48695652173913e-06,
114874
- "loss": 0.0,
114875
  "step": 19116
114876
  },
114877
  {
114878
- "epoch": 4.97,
114879
- "learning_rate": 2.469565217391304e-06,
114880
- "loss": 0.0001,
114881
  "step": 19117
114882
  },
114883
  {
114884
- "epoch": 4.97,
114885
- "learning_rate": 2.452173913043478e-06,
114886
- "loss": 0.0,
114887
  "step": 19118
114888
  },
114889
  {
114890
- "epoch": 4.97,
114891
- "learning_rate": 2.434782608695652e-06,
114892
- "loss": 0.0001,
114893
  "step": 19119
114894
  },
114895
  {
114896
- "epoch": 4.97,
114897
- "learning_rate": 2.4173913043478258e-06,
114898
- "loss": 0.0,
114899
  "step": 19120
114900
  },
114901
  {
114902
- "epoch": 4.97,
114903
- "learning_rate": 2.4e-06,
114904
- "loss": 0.0001,
114905
  "step": 19121
114906
  },
114907
  {
114908
- "epoch": 4.97,
114909
- "learning_rate": 2.3826086956521736e-06,
114910
- "loss": 0.0,
114911
  "step": 19122
114912
  },
114913
  {
114914
- "epoch": 4.97,
114915
- "learning_rate": 2.3652173913043477e-06,
114916
- "loss": 0.0092,
114917
  "step": 19123
114918
  },
114919
  {
114920
- "epoch": 4.97,
114921
- "learning_rate": 2.347826086956522e-06,
114922
- "loss": 0.0,
114923
  "step": 19124
114924
  },
114925
  {
114926
- "epoch": 4.97,
114927
- "learning_rate": 2.330434782608695e-06,
114928
- "loss": 0.0001,
114929
  "step": 19125
114930
  },
114931
  {
114932
- "epoch": 4.97,
114933
- "learning_rate": 2.3130434782608693e-06,
114934
- "loss": 0.0,
114935
  "step": 19126
114936
  },
114937
  {
114938
- "epoch": 4.97,
114939
- "learning_rate": 2.2956521739130434e-06,
114940
  "loss": 0.0,
114941
  "step": 19127
114942
  },
114943
  {
114944
- "epoch": 4.97,
114945
- "learning_rate": 2.278260869565217e-06,
114946
- "loss": 0.0,
114947
  "step": 19128
114948
  },
114949
  {
114950
- "epoch": 4.97,
114951
- "learning_rate": 2.2608695652173913e-06,
114952
  "loss": 0.0,
114953
  "step": 19129
114954
  },
114955
  {
114956
- "epoch": 4.97,
114957
- "learning_rate": 2.243478260869565e-06,
114958
- "loss": 0.0,
114959
  "step": 19130
114960
  },
114961
  {
114962
- "epoch": 4.97,
114963
- "learning_rate": 2.2260869565217387e-06,
114964
  "loss": 0.0001,
114965
  "step": 19131
114966
  },
114967
  {
114968
- "epoch": 4.97,
114969
- "learning_rate": 2.208695652173913e-06,
114970
- "loss": 0.0001,
114971
  "step": 19132
114972
  },
114973
  {
114974
- "epoch": 4.97,
114975
- "learning_rate": 2.191304347826087e-06,
114976
- "loss": 0.0,
114977
  "step": 19133
114978
  },
114979
  {
114980
- "epoch": 4.97,
114981
- "learning_rate": 2.1739130434782607e-06,
114982
  "loss": 0.0001,
114983
  "step": 19134
114984
  },
114985
  {
114986
- "epoch": 4.97,
114987
- "learning_rate": 2.1565217391304344e-06,
114988
- "loss": 0.0,
114989
  "step": 19135
114990
  },
114991
  {
114992
- "epoch": 4.97,
114993
- "learning_rate": 2.1391304347826085e-06,
114994
  "loss": 0.0001,
114995
  "step": 19136
114996
  },
114997
  {
114998
- "epoch": 4.97,
114999
- "learning_rate": 2.1217391304347822e-06,
115000
- "loss": 0.0001,
115001
  "step": 19137
115002
  },
115003
  {
115004
- "epoch": 4.97,
115005
- "learning_rate": 2.1043478260869564e-06,
115006
- "loss": 0.0002,
115007
  "step": 19138
115008
  },
115009
  {
115010
- "epoch": 4.97,
115011
- "learning_rate": 2.0869565217391305e-06,
115012
- "loss": 0.0002,
115013
  "step": 19139
115014
  },
115015
  {
115016
- "epoch": 4.97,
115017
- "learning_rate": 2.0695652173913042e-06,
115018
  "loss": 0.0001,
115019
  "step": 19140
115020
  },
115021
  {
115022
- "epoch": 4.97,
115023
- "learning_rate": 2.052173913043478e-06,
115024
- "loss": 0.0,
115025
  "step": 19141
115026
  },
115027
  {
115028
- "epoch": 4.97,
115029
- "learning_rate": 2.034782608695652e-06,
115030
- "loss": 0.0001,
115031
  "step": 19142
115032
  },
115033
  {
115034
- "epoch": 4.97,
115035
- "learning_rate": 2.0173913043478258e-06,
115036
- "loss": 0.0,
115037
  "step": 19143
115038
  },
115039
  {
115040
- "epoch": 4.97,
115041
- "learning_rate": 2e-06,
115042
- "loss": 0.0,
115043
  "step": 19144
115044
  },
115045
  {
115046
- "epoch": 4.97,
115047
- "learning_rate": 1.9826086956521736e-06,
115048
  "loss": 0.0,
115049
  "step": 19145
115050
  },
115051
  {
115052
- "epoch": 4.97,
115053
- "learning_rate": 1.9652173913043478e-06,
115054
- "loss": 0.0,
115055
  "step": 19146
115056
  },
115057
  {
115058
- "epoch": 4.97,
115059
- "learning_rate": 1.9478260869565215e-06,
115060
- "loss": 0.0,
115061
  "step": 19147
115062
  },
115063
  {
115064
- "epoch": 4.97,
115065
- "learning_rate": 1.9304347826086956e-06,
115066
- "loss": 0.0,
115067
  "step": 19148
115068
  },
115069
  {
115070
- "epoch": 4.97,
115071
- "learning_rate": 1.9130434782608693e-06,
115072
  "loss": 0.0001,
115073
  "step": 19149
115074
  },
115075
  {
115076
- "epoch": 4.97,
115077
- "learning_rate": 1.8956521739130432e-06,
115078
- "loss": 0.0,
115079
  "step": 19150
115080
  },
115081
  {
115082
- "epoch": 4.97,
115083
- "learning_rate": 1.8782608695652174e-06,
115084
- "loss": 0.0001,
115085
  "step": 19151
115086
  },
115087
  {
115088
- "epoch": 4.97,
115089
- "learning_rate": 1.8608695652173913e-06,
115090
  "loss": 0.0001,
115091
  "step": 19152
115092
  },
115093
  {
115094
- "epoch": 4.97,
115095
- "learning_rate": 1.843478260869565e-06,
115096
- "loss": 0.0,
115097
  "step": 19153
115098
  },
115099
  {
115100
- "epoch": 4.97,
115101
- "learning_rate": 1.826086956521739e-06,
115102
- "loss": 0.0,
115103
  "step": 19154
115104
  },
115105
  {
115106
- "epoch": 4.98,
115107
- "learning_rate": 1.8086956521739129e-06,
115108
- "loss": 0.0,
115109
  "step": 19155
115110
  },
115111
  {
115112
- "epoch": 4.98,
115113
- "learning_rate": 1.7913043478260868e-06,
115114
  "loss": 0.0,
115115
  "step": 19156
115116
  },
115117
  {
115118
- "epoch": 4.98,
115119
- "learning_rate": 1.773913043478261e-06,
115120
- "loss": 0.0001,
115121
  "step": 19157
115122
  },
115123
  {
115124
- "epoch": 4.98,
115125
- "learning_rate": 1.7565217391304346e-06,
115126
- "loss": 0.0,
115127
  "step": 19158
115128
  },
115129
  {
115130
- "epoch": 4.98,
115131
- "learning_rate": 1.7391304347826085e-06,
115132
- "loss": 0.0,
115133
  "step": 19159
115134
  },
115135
  {
115136
- "epoch": 4.98,
115137
- "learning_rate": 1.7217391304347825e-06,
115138
- "loss": 0.0,
115139
  "step": 19160
115140
  },
115141
  {
115142
- "epoch": 4.98,
115143
- "learning_rate": 1.7043478260869564e-06,
115144
- "loss": 0.0002,
115145
  "step": 19161
115146
  },
115147
  {
115148
- "epoch": 4.98,
115149
- "learning_rate": 1.68695652173913e-06,
115150
  "loss": 0.0001,
115151
  "step": 19162
115152
  },
115153
  {
115154
- "epoch": 4.98,
115155
- "learning_rate": 1.6695652173913042e-06,
115156
- "loss": 0.0001,
115157
  "step": 19163
115158
  },
115159
  {
115160
- "epoch": 4.98,
115161
- "learning_rate": 1.6521739130434782e-06,
115162
- "loss": 0.0001,
115163
  "step": 19164
115164
  },
115165
  {
115166
- "epoch": 4.98,
115167
- "learning_rate": 1.634782608695652e-06,
115168
  "loss": 0.0,
115169
  "step": 19165
115170
  },
115171
  {
115172
- "epoch": 4.98,
115173
- "learning_rate": 1.617391304347826e-06,
115174
  "loss": 0.0001,
115175
  "step": 19166
115176
  },
115177
  {
115178
- "epoch": 4.98,
115179
- "learning_rate": 1.5999999999999997e-06,
115180
- "loss": 0.0,
115181
  "step": 19167
115182
  },
115183
  {
115184
- "epoch": 4.98,
115185
- "learning_rate": 1.5826086956521736e-06,
115186
  "loss": 0.0,
115187
  "step": 19168
115188
  },
115189
  {
115190
- "epoch": 4.98,
115191
- "learning_rate": 1.5652173913043478e-06,
115192
- "loss": 0.0001,
115193
  "step": 19169
115194
  },
115195
  {
115196
- "epoch": 4.98,
115197
- "learning_rate": 1.5478260869565217e-06,
115198
- "loss": 0.0001,
115199
  "step": 19170
115200
  },
115201
  {
115202
- "epoch": 4.98,
115203
- "learning_rate": 1.5304347826086956e-06,
115204
- "loss": 0.0001,
115205
  "step": 19171
115206
  },
115207
  {
115208
- "epoch": 4.98,
115209
- "learning_rate": 1.5130434782608693e-06,
115210
- "loss": 0.0,
115211
  "step": 19172
115212
  },
115213
  {
115214
- "epoch": 4.98,
115215
- "learning_rate": 1.4956521739130433e-06,
115216
- "loss": 0.0,
115217
  "step": 19173
115218
  },
115219
  {
115220
- "epoch": 4.98,
115221
- "learning_rate": 1.4782608695652172e-06,
115222
- "loss": 0.0,
115223
  "step": 19174
115224
  },
115225
  {
115226
- "epoch": 4.98,
115227
- "learning_rate": 1.460869565217391e-06,
115228
- "loss": 0.0001,
115229
  "step": 19175
115230
  },
115231
  {
115232
- "epoch": 4.98,
115233
- "learning_rate": 1.4434782608695652e-06,
115234
- "loss": 0.0001,
115235
  "step": 19176
115236
  },
115237
  {
115238
- "epoch": 4.98,
115239
- "learning_rate": 1.4260869565217392e-06,
115240
- "loss": 0.0001,
115241
  "step": 19177
115242
  },
115243
  {
115244
- "epoch": 4.98,
115245
- "learning_rate": 1.4086956521739129e-06,
115246
- "loss": 0.0,
115247
  "step": 19178
115248
  },
115249
  {
115250
- "epoch": 4.98,
115251
- "learning_rate": 1.3913043478260868e-06,
115252
  "loss": 0.0001,
115253
  "step": 19179
115254
  },
115255
  {
115256
- "epoch": 4.98,
115257
- "learning_rate": 1.3739130434782607e-06,
115258
  "loss": 0.0,
115259
  "step": 19180
115260
  },
115261
  {
115262
- "epoch": 4.98,
115263
- "learning_rate": 1.3565217391304346e-06,
115264
  "loss": 0.0001,
115265
  "step": 19181
115266
  },
115267
  {
115268
- "epoch": 4.98,
115269
- "learning_rate": 1.3391304347826088e-06,
115270
- "loss": 0.0013,
115271
  "step": 19182
115272
  },
115273
  {
115274
- "epoch": 4.98,
115275
- "learning_rate": 1.3217391304347825e-06,
115276
- "loss": 0.0,
115277
  "step": 19183
115278
  },
115279
  {
115280
- "epoch": 4.98,
115281
- "learning_rate": 1.3043478260869564e-06,
115282
- "loss": 0.0,
115283
  "step": 19184
115284
  },
115285
  {
115286
- "epoch": 4.98,
115287
- "learning_rate": 1.2869565217391303e-06,
115288
- "loss": 0.0001,
115289
  "step": 19185
115290
  },
115291
  {
115292
- "epoch": 4.98,
115293
- "learning_rate": 1.2695652173913043e-06,
115294
  "loss": 0.0,
115295
  "step": 19186
115296
  },
115297
  {
115298
- "epoch": 4.98,
115299
- "learning_rate": 1.252173913043478e-06,
115300
  "loss": 0.0,
115301
  "step": 19187
115302
  },
115303
  {
115304
- "epoch": 4.98,
115305
- "learning_rate": 1.234782608695652e-06,
115306
- "loss": 0.0001,
115307
  "step": 19188
115308
  },
115309
  {
115310
- "epoch": 4.98,
115311
- "learning_rate": 1.217391304347826e-06,
115312
  "loss": 0.0001,
115313
  "step": 19189
115314
  },
115315
  {
115316
- "epoch": 4.98,
115317
- "learning_rate": 1.2e-06,
115318
- "loss": 0.0001,
115319
  "step": 19190
115320
  },
115321
  {
115322
- "epoch": 4.98,
115323
- "learning_rate": 1.1826086956521739e-06,
115324
- "loss": 0.0,
115325
  "step": 19191
115326
  },
115327
  {
115328
- "epoch": 4.98,
115329
- "learning_rate": 1.1652173913043476e-06,
115330
- "loss": 0.0783,
115331
  "step": 19192
115332
  },
115333
  {
115334
- "epoch": 4.99,
115335
- "learning_rate": 1.1478260869565217e-06,
115336
- "loss": 0.0001,
115337
  "step": 19193
115338
  },
115339
  {
115340
- "epoch": 4.99,
115341
- "learning_rate": 1.1304347826086956e-06,
115342
  "loss": 0.0001,
115343
  "step": 19194
115344
  },
115345
  {
115346
- "epoch": 4.99,
115347
- "learning_rate": 1.1130434782608693e-06,
115348
- "loss": 0.0002,
115349
  "step": 19195
115350
  },
115351
  {
115352
- "epoch": 4.99,
115353
- "learning_rate": 1.0956521739130435e-06,
115354
- "loss": 0.0001,
115355
  "step": 19196
115356
  },
115357
  {
115358
- "epoch": 4.99,
115359
- "learning_rate": 1.0782608695652172e-06,
115360
- "loss": 0.0,
115361
  "step": 19197
115362
  },
115363
  {
115364
- "epoch": 4.99,
115365
- "learning_rate": 1.0608695652173911e-06,
115366
- "loss": 0.0001,
115367
  "step": 19198
115368
  },
115369
  {
115370
- "epoch": 4.99,
115371
- "learning_rate": 1.0434782608695653e-06,
115372
- "loss": 0.0001,
115373
  "step": 19199
115374
  },
115375
  {
115376
- "epoch": 4.99,
115377
- "learning_rate": 1.026086956521739e-06,
115378
- "loss": 0.0,
115379
  "step": 19200
115380
  },
115381
  {
115382
- "epoch": 4.99,
115383
- "learning_rate": 1.0086956521739129e-06,
115384
- "loss": 0.0,
115385
  "step": 19201
115386
  },
115387
  {
115388
- "epoch": 4.99,
115389
- "learning_rate": 9.913043478260868e-07,
115390
- "loss": 0.0001,
115391
  "step": 19202
115392
  },
115393
  {
115394
- "epoch": 4.99,
115395
- "learning_rate": 9.739130434782607e-07,
115396
- "loss": 0.0001,
115397
  "step": 19203
115398
  },
115399
  {
115400
- "epoch": 4.99,
115401
- "learning_rate": 9.565217391304347e-07,
115402
- "loss": 0.0001,
115403
  "step": 19204
115404
  },
115405
  {
115406
- "epoch": 4.99,
115407
- "learning_rate": 9.391304347826087e-07,
115408
  "loss": 0.0,
115409
  "step": 19205
115410
  },
115411
  {
115412
- "epoch": 4.99,
115413
- "learning_rate": 9.217391304347825e-07,
115414
- "loss": 0.0001,
115415
  "step": 19206
115416
  },
115417
  {
115418
- "epoch": 4.99,
115419
- "learning_rate": 9.043478260869564e-07,
115420
- "loss": 0.2202,
115421
  "step": 19207
115422
  },
115423
  {
115424
- "epoch": 4.99,
115425
- "learning_rate": 8.869565217391305e-07,
115426
- "loss": 0.0,
115427
  "step": 19208
115428
  },
115429
  {
115430
- "epoch": 4.99,
115431
- "learning_rate": 8.695652173913043e-07,
115432
- "loss": 0.0001,
115433
  "step": 19209
115434
  },
115435
  {
115436
- "epoch": 4.99,
115437
- "learning_rate": 8.521739130434782e-07,
115438
  "loss": 0.0001,
115439
  "step": 19210
115440
  },
115441
  {
115442
- "epoch": 4.99,
115443
- "learning_rate": 8.347826086956521e-07,
115444
  "loss": 0.0001,
115445
  "step": 19211
115446
  },
115447
  {
115448
- "epoch": 4.99,
115449
- "learning_rate": 8.17391304347826e-07,
115450
  "loss": 0.0001,
115451
  "step": 19212
115452
  },
115453
  {
115454
- "epoch": 4.99,
115455
- "learning_rate": 7.999999999999999e-07,
115456
  "loss": 0.0001,
115457
  "step": 19213
115458
  },
115459
  {
115460
- "epoch": 4.99,
115461
- "learning_rate": 7.826086956521739e-07,
115462
- "loss": 0.0,
115463
  "step": 19214
115464
  },
115465
  {
115466
- "epoch": 4.99,
115467
- "learning_rate": 7.652173913043478e-07,
115468
- "loss": 0.0,
115469
  "step": 19215
115470
  },
115471
  {
115472
- "epoch": 4.99,
115473
- "learning_rate": 7.478260869565216e-07,
115474
- "loss": 0.0001,
115475
  "step": 19216
115476
  },
115477
  {
115478
- "epoch": 4.99,
115479
- "learning_rate": 7.304347826086956e-07,
115480
  "loss": 0.0001,
115481
  "step": 19217
115482
  },
115483
  {
115484
- "epoch": 4.99,
115485
- "learning_rate": 7.130434782608696e-07,
115486
- "loss": 0.0001,
115487
  "step": 19218
115488
  },
115489
  {
115490
- "epoch": 4.99,
115491
- "learning_rate": 6.956521739130434e-07,
115492
- "loss": 0.0001,
115493
  "step": 19219
115494
  },
115495
  {
115496
- "epoch": 4.99,
115497
- "learning_rate": 6.782608695652173e-07,
115498
  "loss": 0.0,
115499
  "step": 19220
115500
  },
115501
  {
115502
- "epoch": 4.99,
115503
- "learning_rate": 6.608695652173912e-07,
115504
- "loss": 0.0,
115505
  "step": 19221
115506
  },
115507
  {
115508
- "epoch": 4.99,
115509
- "learning_rate": 6.434782608695652e-07,
115510
- "loss": 0.0001,
115511
  "step": 19222
115512
  },
115513
  {
115514
- "epoch": 4.99,
115515
- "learning_rate": 6.26086956521739e-07,
115516
- "loss": 0.0001,
115517
  "step": 19223
115518
  },
115519
  {
115520
- "epoch": 4.99,
115521
- "learning_rate": 6.08695652173913e-07,
115522
- "loss": 0.0001,
115523
  "step": 19224
115524
  },
115525
  {
115526
- "epoch": 4.99,
115527
- "learning_rate": 5.913043478260869e-07,
115528
- "loss": 0.0001,
115529
  "step": 19225
115530
  },
115531
  {
115532
- "epoch": 4.99,
115533
- "learning_rate": 5.739130434782609e-07,
115534
- "loss": 0.0001,
115535
  "step": 19226
115536
  },
115537
  {
115538
- "epoch": 4.99,
115539
- "learning_rate": 5.565217391304347e-07,
115540
- "loss": 0.0001,
115541
  "step": 19227
115542
  },
115543
  {
115544
- "epoch": 4.99,
115545
- "learning_rate": 5.391304347826086e-07,
115546
- "loss": 0.0,
115547
  "step": 19228
115548
  },
115549
  {
115550
- "epoch": 4.99,
115551
- "learning_rate": 5.217391304347826e-07,
115552
  "loss": 0.0,
115553
  "step": 19229
115554
  },
115555
  {
115556
- "epoch": 4.99,
115557
- "learning_rate": 5.043478260869564e-07,
115558
  "loss": 0.0001,
115559
  "step": 19230
115560
  },
115561
  {
115562
- "epoch": 4.99,
115563
- "learning_rate": 4.869565217391304e-07,
115564
- "loss": 0.0001,
115565
  "step": 19231
115566
  },
115567
  {
115568
- "epoch": 5.0,
115569
- "learning_rate": 4.6956521739130434e-07,
115570
- "loss": 0.0002,
115571
  "step": 19232
115572
  },
115573
  {
115574
- "epoch": 5.0,
115575
- "learning_rate": 4.521739130434782e-07,
115576
- "loss": 0.0,
115577
  "step": 19233
115578
  },
115579
  {
115580
- "epoch": 5.0,
115581
- "learning_rate": 4.3478260869565214e-07,
115582
  "loss": 0.0001,
115583
  "step": 19234
115584
  },
115585
  {
115586
- "epoch": 5.0,
115587
- "learning_rate": 4.1739130434782606e-07,
115588
  "loss": 0.0001,
115589
  "step": 19235
115590
  },
115591
  {
115592
- "epoch": 5.0,
115593
- "learning_rate": 3.9999999999999993e-07,
115594
- "loss": 0.0,
115595
  "step": 19236
115596
  },
115597
  {
115598
- "epoch": 5.0,
115599
- "learning_rate": 3.826086956521739e-07,
115600
- "loss": 0.0001,
115601
  "step": 19237
115602
  },
115603
  {
115604
- "epoch": 5.0,
115605
- "learning_rate": 3.652173913043478e-07,
115606
  "loss": 0.0001,
115607
  "step": 19238
115608
  },
115609
  {
115610
- "epoch": 5.0,
115611
- "learning_rate": 3.478260869565217e-07,
115612
  "loss": 0.0001,
115613
  "step": 19239
115614
  },
115615
  {
115616
- "epoch": 5.0,
115617
- "learning_rate": 3.304347826086956e-07,
115618
  "loss": 0.0,
115619
  "step": 19240
115620
  },
115621
  {
115622
- "epoch": 5.0,
115623
- "learning_rate": 3.130434782608695e-07,
115624
- "loss": 0.0,
115625
  "step": 19241
115626
  },
115627
  {
115628
- "epoch": 5.0,
115629
- "learning_rate": 2.9565217391304347e-07,
115630
  "loss": 0.0,
115631
  "step": 19242
115632
  },
115633
  {
115634
- "epoch": 5.0,
115635
- "learning_rate": 2.7826086956521734e-07,
115636
- "loss": 0.0,
115637
  "step": 19243
115638
  },
115639
  {
115640
- "epoch": 5.0,
115641
- "learning_rate": 2.608695652173913e-07,
115642
- "loss": 0.0001,
115643
  "step": 19244
115644
  },
115645
  {
115646
- "epoch": 5.0,
115647
- "learning_rate": 2.434782608695652e-07,
115648
- "loss": 0.0001,
115649
  "step": 19245
115650
  },
115651
  {
115652
- "epoch": 5.0,
115653
- "learning_rate": 2.260869565217391e-07,
115654
- "loss": 0.0,
115655
  "step": 19246
115656
  },
115657
  {
115658
- "epoch": 5.0,
115659
- "learning_rate": 2.0869565217391303e-07,
115660
- "loss": 0.0001,
115661
  "step": 19247
115662
  },
115663
  {
115664
- "epoch": 5.0,
115665
- "learning_rate": 1.9130434782608695e-07,
115666
  "loss": 0.0001,
115667
  "step": 19248
115668
  },
115669
  {
115670
- "epoch": 5.0,
115671
- "learning_rate": 1.7391304347826085e-07,
115672
- "loss": 0.0001,
115673
  "step": 19249
115674
  },
115675
  {
115676
- "epoch": 5.0,
115677
- "learning_rate": 1.5652173913043475e-07,
115678
- "loss": 0.0001,
115679
  "step": 19250
115680
  },
115681
  {
115682
- "epoch": 5.0,
115683
- "step": 19250,
115684
- "total_flos": 4.686304027304379e+20,
115685
- "train_loss": 0.1620459199016551,
115686
- "train_runtime": 43631.9674,
115687
- "train_samples_per_second": 28.237,
115688
- "train_steps_per_second": 0.441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115689
  }
115690
  ],
115691
- "max_steps": 19250,
115692
  "num_train_epochs": 5,
115693
- "total_flos": 4.686304027304379e+20,
115694
  "trial_name": null,
115695
  "trial_params": null
115696
  }
1
  {
2
  "best_metric": 0.7236466414789606,
3
  "best_model_checkpoint": "xtreme_s_xlsr_300m_fleurs_langid_truncated/checkpoint-19000",
4
+ "epoch": 4.066216567125422,
5
+ "global_step": 19255,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
114179
  "step": 19000
114180
  },
114181
  {
114182
+ "epoch": 4.0,
114183
+ "learning_rate": 4.572587655751956e-06,
114184
  "loss": 0.0001,
114185
  "step": 19001
114186
  },
114187
  {
114188
+ "epoch": 4.0,
114189
+ "learning_rate": 4.555201390901187e-06,
114190
+ "loss": 0.0002,
114191
  "step": 19002
114192
  },
114193
  {
114194
+ "epoch": 4.0,
114195
+ "learning_rate": 4.53781512605042e-06,
114196
  "loss": 0.0001,
114197
  "step": 19003
114198
  },
114199
  {
114200
+ "epoch": 4.0,
114201
+ "learning_rate": 4.520428861199652e-06,
114202
+ "loss": 0.0002,
114203
  "step": 19004
114204
  },
114205
  {
114206
+ "epoch": 4.0,
114207
+ "learning_rate": 4.503042596348884e-06,
114208
+ "loss": 0.0001,
114209
  "step": 19005
114210
  },
114211
  {
114212
+ "epoch": 4.0,
114213
+ "learning_rate": 4.4856563314981165e-06,
114214
+ "loss": 0.0001,
114215
  "step": 19006
114216
  },
114217
  {
114218
+ "epoch": 4.0,
114219
+ "learning_rate": 4.468270066647348e-06,
114220
+ "loss": 0.0,
114221
  "step": 19007
114222
  },
114223
  {
114224
+ "epoch": 4.0,
114225
+ "learning_rate": 4.45088380179658e-06,
114226
+ "loss": 0.0001,
114227
  "step": 19008
114228
  },
114229
  {
114230
+ "epoch": 4.0,
114231
+ "learning_rate": 4.433497536945813e-06,
114232
+ "loss": 0.0,
114233
  "step": 19009
114234
  },
114235
  {
114236
+ "epoch": 4.0,
114237
+ "learning_rate": 4.416111272095045e-06,
114238
+ "loss": 0.0002,
114239
  "step": 19010
114240
  },
114241
  {
114242
+ "epoch": 4.0,
114243
+ "learning_rate": 4.398725007244277e-06,
114244
+ "loss": 0.0001,
114245
  "step": 19011
114246
  },
114247
  {
114248
+ "epoch": 4.0,
114249
+ "learning_rate": 4.381338742393509e-06,
114250
+ "loss": 0.0,
114251
  "step": 19012
114252
  },
114253
  {
114254
+ "epoch": 4.0,
114255
+ "learning_rate": 4.363952477542741e-06,
114256
+ "loss": 0.0,
114257
  "step": 19013
114258
  },
114259
  {
114260
+ "epoch": 4.0,
114261
+ "learning_rate": 4.346566212691972e-06,
114262
  "loss": 0.0001,
114263
  "step": 19014
114264
  },
114265
  {
114266
+ "epoch": 4.0,
114267
+ "learning_rate": 4.3291799478412056e-06,
114268
+ "loss": 0.0002,
114269
  "step": 19015
114270
  },
114271
  {
114272
+ "epoch": 4.0,
114273
+ "learning_rate": 4.311793682990437e-06,
114274
  "loss": 0.0001,
114275
  "step": 19016
114276
  },
114277
  {
114278
+ "epoch": 4.0,
114279
+ "learning_rate": 4.294407418139669e-06,
114280
+ "loss": 0.0,
114281
  "step": 19017
114282
  },
114283
  {
114284
+ "epoch": 4.0,
114285
+ "learning_rate": 4.277021153288902e-06,
114286
+ "loss": 0.0,
114287
  "step": 19018
114288
  },
114289
  {
114290
+ "epoch": 4.0,
114291
+ "learning_rate": 4.259634888438133e-06,
114292
+ "loss": 0.0001,
114293
  "step": 19019
114294
  },
114295
  {
114296
+ "epoch": 4.01,
114297
+ "learning_rate": 4.242248623587365e-06,
114298
+ "loss": 0.0001,
114299
  "step": 19020
114300
  },
114301
  {
114302
+ "epoch": 4.01,
114303
+ "learning_rate": 4.224862358736598e-06,
114304
+ "loss": 0.0002,
114305
  "step": 19021
114306
  },
114307
  {
114308
+ "epoch": 4.01,
114309
+ "learning_rate": 4.20747609388583e-06,
114310
  "loss": 0.0001,
114311
  "step": 19022
114312
  },
114313
  {
114314
+ "epoch": 4.01,
114315
+ "learning_rate": 4.190089829035061e-06,
114316
  "loss": 0.0001,
114317
  "step": 19023
114318
  },
114319
  {
114320
+ "epoch": 4.01,
114321
+ "learning_rate": 4.172703564184294e-06,
114322
  "loss": 0.0001,
114323
  "step": 19024
114324
  },
114325
  {
114326
+ "epoch": 4.01,
114327
+ "learning_rate": 4.155317299333526e-06,
114328
  "loss": 0.0001,
114329
  "step": 19025
114330
  },
114331
  {
114332
+ "epoch": 4.01,
114333
+ "learning_rate": 4.137931034482758e-06,
114334
+ "loss": 0.0001,
114335
  "step": 19026
114336
  },
114337
  {
114338
+ "epoch": 4.01,
114339
+ "learning_rate": 4.120544769631991e-06,
114340
+ "loss": 0.002,
114341
  "step": 19027
114342
  },
114343
  {
114344
+ "epoch": 4.01,
114345
+ "learning_rate": 4.103158504781222e-06,
114346
  "loss": 0.0001,
114347
  "step": 19028
114348
  },
114349
  {
114350
+ "epoch": 4.01,
114351
+ "learning_rate": 4.085772239930454e-06,
114352
  "loss": 0.0,
114353
  "step": 19029
114354
  },
114355
  {
114356
+ "epoch": 4.01,
114357
+ "learning_rate": 4.068385975079687e-06,
114358
  "loss": 0.0001,
114359
  "step": 19030
114360
  },
114361
  {
114362
+ "epoch": 4.01,
114363
+ "learning_rate": 4.050999710228919e-06,
114364
+ "loss": 0.0001,
114365
  "step": 19031
114366
  },
114367
  {
114368
+ "epoch": 4.01,
114369
+ "learning_rate": 4.033613445378151e-06,
114370
+ "loss": 0.0,
114371
  "step": 19032
114372
  },
114373
  {
114374
+ "epoch": 4.01,
114375
+ "learning_rate": 4.016227180527383e-06,
114376
  "loss": 0.0,
114377
  "step": 19033
114378
  },
114379
  {
114380
+ "epoch": 4.01,
114381
+ "learning_rate": 3.998840915676615e-06,
114382
  "loss": 0.0001,
114383
  "step": 19034
114384
  },
114385
  {
114386
+ "epoch": 4.01,
114387
+ "learning_rate": 3.981454650825847e-06,
114388
  "loss": 0.0001,
114389
  "step": 19035
114390
  },
114391
  {
114392
+ "epoch": 4.01,
114393
+ "learning_rate": 3.96406838597508e-06,
114394
+ "loss": 0.0001,
114395
  "step": 19036
114396
  },
114397
  {
114398
+ "epoch": 4.01,
114399
+ "learning_rate": 3.946682121124311e-06,
114400
  "loss": 0.0001,
114401
  "step": 19037
114402
  },
114403
  {
114404
+ "epoch": 4.01,
114405
+ "learning_rate": 3.929295856273543e-06,
114406
  "loss": 0.0001,
114407
  "step": 19038
114408
  },
114409
  {
114410
+ "epoch": 4.01,
114411
+ "learning_rate": 3.911909591422776e-06,
114412
+ "loss": 0.0007,
114413
  "step": 19039
114414
  },
114415
  {
114416
+ "epoch": 4.01,
114417
+ "learning_rate": 3.894523326572008e-06,
114418
+ "loss": 0.0,
114419
  "step": 19040
114420
  },
114421
  {
114422
+ "epoch": 4.01,
114423
+ "learning_rate": 3.87713706172124e-06,
114424
+ "loss": 0.0,
114425
  "step": 19041
114426
  },
114427
  {
114428
+ "epoch": 4.01,
114429
+ "learning_rate": 3.859750796870472e-06,
114430
  "loss": 0.0001,
114431
  "step": 19042
114432
  },
114433
  {
114434
+ "epoch": 4.01,
114435
+ "learning_rate": 3.842364532019704e-06,
114436
  "loss": 0.0001,
114437
  "step": 19043
114438
  },
114439
  {
114440
+ "epoch": 4.01,
114441
+ "learning_rate": 3.824978267168936e-06,
114442
+ "loss": 0.0002,
114443
  "step": 19044
114444
  },
114445
  {
114446
+ "epoch": 4.01,
114447
+ "learning_rate": 3.807592002318168e-06,
114448
  "loss": 0.0001,
114449
  "step": 19045
114450
  },
114451
  {
114452
+ "epoch": 4.01,
114453
+ "learning_rate": 3.7902057374674005e-06,
114454
  "loss": 0.0001,
114455
  "step": 19046
114456
  },
114457
  {
114458
+ "epoch": 4.01,
114459
+ "learning_rate": 3.7728194726166324e-06,
114460
+ "loss": 0.0001,
114461
  "step": 19047
114462
  },
114463
  {
114464
+ "epoch": 4.01,
114465
+ "learning_rate": 3.7554332077658647e-06,
114466
  "loss": 0.0,
114467
  "step": 19048
114468
  },
114469
  {
114470
+ "epoch": 4.01,
114471
+ "learning_rate": 3.7380469429150966e-06,
114472
  "loss": 0.0,
114473
  "step": 19049
114474
  },
114475
  {
114476
+ "epoch": 4.01,
114477
+ "learning_rate": 3.720660678064329e-06,
114478
+ "loss": 0.0,
114479
  "step": 19050
114480
  },
114481
  {
114482
+ "epoch": 4.01,
114483
+ "learning_rate": 3.7032744132135607e-06,
114484
+ "loss": 0.0001,
114485
  "step": 19051
114486
  },
114487
  {
114488
+ "epoch": 4.01,
114489
+ "learning_rate": 3.685888148362793e-06,
114490
+ "loss": 0.0001,
114491
  "step": 19052
114492
  },
114493
  {
114494
+ "epoch": 4.01,
114495
+ "learning_rate": 3.6685018835120253e-06,
114496
+ "loss": 0.0001,
114497
  "step": 19053
114498
  },
114499
  {
114500
+ "epoch": 4.01,
114501
+ "learning_rate": 3.6511156186612572e-06,
114502
+ "loss": 0.0,
114503
  "step": 19054
114504
  },
114505
  {
114506
+ "epoch": 4.01,
114507
+ "learning_rate": 3.6337293538104895e-06,
114508
+ "loss": 0.0001,
114509
  "step": 19055
114510
  },
114511
  {
114512
+ "epoch": 4.01,
114513
+ "learning_rate": 3.6163430889597214e-06,
114514
  "loss": 0.0001,
114515
  "step": 19056
114516
  },
114517
  {
114518
+ "epoch": 4.01,
114519
+ "learning_rate": 3.5989568241089537e-06,
114520
  "loss": 0.0001,
114521
  "step": 19057
114522
  },
114523
  {
114524
+ "epoch": 4.02,
114525
+ "learning_rate": 3.5815705592581856e-06,
114526
  "loss": 0.0001,
114527
  "step": 19058
114528
  },
114529
  {
114530
+ "epoch": 4.02,
114531
+ "learning_rate": 3.564184294407418e-06,
114532
+ "loss": 0.0,
114533
  "step": 19059
114534
  },
114535
  {
114536
+ "epoch": 4.02,
114537
+ "learning_rate": 3.54679802955665e-06,
114538
+ "loss": 0.0002,
114539
  "step": 19060
114540
  },
114541
  {
114542
+ "epoch": 4.02,
114543
+ "learning_rate": 3.529411764705882e-06,
114544
+ "loss": 0.0001,
114545
  "step": 19061
114546
  },
114547
  {
114548
+ "epoch": 4.02,
114549
+ "learning_rate": 3.5120254998551143e-06,
114550
+ "loss": 0.0832,
114551
  "step": 19062
114552
  },
114553
  {
114554
+ "epoch": 4.02,
114555
+ "learning_rate": 3.4946392350043462e-06,
114556
+ "loss": 0.0,
114557
  "step": 19063
114558
  },
114559
  {
114560
+ "epoch": 4.02,
114561
+ "learning_rate": 3.4772529701535785e-06,
114562
+ "loss": 0.0001,
114563
  "step": 19064
114564
  },
114565
  {
114566
+ "epoch": 4.02,
114567
+ "learning_rate": 3.4598667053028104e-06,
114568
  "loss": 0.0001,
114569
  "step": 19065
114570
  },
114571
  {
114572
+ "epoch": 4.02,
114573
+ "learning_rate": 3.4424804404520427e-06,
114574
+ "loss": 0.0002,
114575
  "step": 19066
114576
  },
114577
  {
114578
+ "epoch": 4.02,
114579
+ "learning_rate": 3.425094175601275e-06,
114580
  "loss": 0.0001,
114581
  "step": 19067
114582
  },
114583
  {
114584
+ "epoch": 4.02,
114585
+ "learning_rate": 3.407707910750507e-06,
114586
+ "loss": 0.0,
114587
  "step": 19068
114588
  },
114589
  {
114590
+ "epoch": 4.02,
114591
+ "learning_rate": 3.390321645899739e-06,
114592
  "loss": 0.0001,
114593
  "step": 19069
114594
  },
114595
  {
114596
+ "epoch": 4.02,
114597
+ "learning_rate": 3.372935381048971e-06,
114598
  "loss": 0.0,
114599
  "step": 19070
114600
  },
114601
  {
114602
+ "epoch": 4.02,
114603
+ "learning_rate": 3.3555491161982034e-06,
114604
+ "loss": 0.0001,
114605
  "step": 19071
114606
  },
114607
  {
114608
+ "epoch": 4.02,
114609
+ "learning_rate": 3.3381628513474352e-06,
114610
  "loss": 0.0,
114611
  "step": 19072
114612
  },
114613
  {
114614
+ "epoch": 4.02,
114615
+ "learning_rate": 3.3207765864966675e-06,
114616
  "loss": 0.0001,
114617
  "step": 19073
114618
  },
114619
  {
114620
+ "epoch": 4.02,
114621
+ "learning_rate": 3.3033903216459e-06,
114622
+ "loss": 0.0,
114623
  "step": 19074
114624
  },
114625
  {
114626
+ "epoch": 4.02,
114627
+ "learning_rate": 3.2860040567951317e-06,
114628
  "loss": 0.0001,
114629
  "step": 19075
114630
  },
114631
  {
114632
+ "epoch": 4.02,
114633
+ "learning_rate": 3.268617791944364e-06,
114634
  "loss": 0.0,
114635
  "step": 19076
114636
  },
114637
  {
114638
+ "epoch": 4.02,
114639
+ "learning_rate": 3.2512315270935955e-06,
114640
+ "loss": 0.0,
114641
  "step": 19077
114642
  },
114643
  {
114644
+ "epoch": 4.02,
114645
+ "learning_rate": 3.233845262242828e-06,
114646
  "loss": 0.0,
114647
  "step": 19078
114648
  },
114649
  {
114650
+ "epoch": 4.02,
114651
+ "learning_rate": 3.2164589973920596e-06,
114652
  "loss": 0.0,
114653
  "step": 19079
114654
  },
114655
  {
114656
+ "epoch": 4.02,
114657
+ "learning_rate": 3.199072732541292e-06,
114658
+ "loss": 0.0,
114659
  "step": 19080
114660
  },
114661
  {
114662
+ "epoch": 4.02,
114663
+ "learning_rate": 3.1816864676905247e-06,
114664
  "loss": 0.0001,
114665
  "step": 19081
114666
  },
114667
  {
114668
+ "epoch": 4.02,
114669
+ "learning_rate": 3.164300202839756e-06,
114670
+ "loss": 0.0,
114671
  "step": 19082
114672
  },
114673
  {
114674
+ "epoch": 4.02,
114675
+ "learning_rate": 3.1469139379889884e-06,
114676
+ "loss": 0.0,
114677
  "step": 19083
114678
  },
114679
  {
114680
+ "epoch": 4.02,
114681
+ "learning_rate": 3.1295276731382203e-06,
114682
+ "loss": 0.0,
114683
  "step": 19084
114684
  },
114685
  {
114686
+ "epoch": 4.02,
114687
+ "learning_rate": 3.1121414082874526e-06,
114688
+ "loss": 0.0002,
114689
  "step": 19085
114690
  },
114691
  {
114692
+ "epoch": 4.02,
114693
+ "learning_rate": 3.0947551434366845e-06,
114694
+ "loss": 0.0,
114695
  "step": 19086
114696
  },
114697
  {
114698
+ "epoch": 4.02,
114699
+ "learning_rate": 3.0773688785859168e-06,
114700
+ "loss": 0.0002,
114701
  "step": 19087
114702
  },
114703
  {
114704
+ "epoch": 4.02,
114705
+ "learning_rate": 3.059982613735149e-06,
114706
+ "loss": 0.0,
114707
  "step": 19088
114708
  },
114709
  {
114710
+ "epoch": 4.02,
114711
+ "learning_rate": 3.042596348884381e-06,
114712
+ "loss": 0.0002,
114713
  "step": 19089
114714
  },
114715
  {
114716
+ "epoch": 4.02,
114717
+ "learning_rate": 3.0252100840336132e-06,
114718
  "loss": 0.0002,
114719
  "step": 19090
114720
  },
114721
  {
114722
+ "epoch": 4.02,
114723
+ "learning_rate": 3.007823819182845e-06,
114724
+ "loss": 0.0002,
114725
  "step": 19091
114726
  },
114727
  {
114728
+ "epoch": 4.02,
114729
+ "learning_rate": 2.9904375543320774e-06,
114730
+ "loss": 0.0001,
114731
  "step": 19092
114732
  },
114733
  {
114734
+ "epoch": 4.02,
114735
+ "learning_rate": 2.9730512894813093e-06,
114736
+ "loss": 0.0002,
114737
  "step": 19093
114738
  },
114739
  {
114740
+ "epoch": 4.02,
114741
+ "learning_rate": 2.9556650246305416e-06,
114742
+ "loss": 0.0017,
114743
  "step": 19094
114744
  },
114745
  {
114746
+ "epoch": 4.02,
114747
+ "learning_rate": 2.938278759779774e-06,
114748
  "loss": 0.0001,
114749
  "step": 19095
114750
  },
114751
  {
114752
+ "epoch": 4.02,
114753
+ "learning_rate": 2.9208924949290058e-06,
114754
+ "loss": 0.0001,
114755
  "step": 19096
114756
  },
114757
  {
114758
+ "epoch": 4.03,
114759
+ "learning_rate": 2.903506230078238e-06,
114760
+ "loss": 0.0002,
114761
  "step": 19097
114762
  },
114763
  {
114764
+ "epoch": 4.03,
114765
+ "learning_rate": 2.88611996522747e-06,
114766
+ "loss": 0.0021,
114767
  "step": 19098
114768
  },
114769
  {
114770
+ "epoch": 4.03,
114771
+ "learning_rate": 2.8687337003767022e-06,
114772
+ "loss": 0.0001,
114773
  "step": 19099
114774
  },
114775
  {
114776
+ "epoch": 4.03,
114777
+ "learning_rate": 2.851347435525934e-06,
114778
+ "loss": 0.0002,
114779
  "step": 19100
114780
  },
114781
  {
114782
+ "epoch": 4.03,
114783
+ "learning_rate": 2.8339611706751664e-06,
114784
  "loss": 0.0001,
114785
  "step": 19101
114786
  },
114787
  {
114788
+ "epoch": 4.03,
114789
+ "learning_rate": 2.8165749058243987e-06,
114790
+ "loss": 0.0001,
114791
  "step": 19102
114792
  },
114793
  {
114794
+ "epoch": 4.03,
114795
+ "learning_rate": 2.7991886409736306e-06,
114796
+ "loss": 0.0001,
114797
  "step": 19103
114798
  },
114799
  {
114800
+ "epoch": 4.03,
114801
+ "learning_rate": 2.781802376122863e-06,
114802
+ "loss": 0.0,
114803
  "step": 19104
114804
  },
114805
  {
114806
+ "epoch": 4.03,
114807
+ "learning_rate": 2.7644161112720948e-06,
114808
  "loss": 0.0001,
114809
  "step": 19105
114810
  },
114811
  {
114812
+ "epoch": 4.03,
114813
+ "learning_rate": 2.747029846421327e-06,
114814
+ "loss": 0.0,
114815
  "step": 19106
114816
  },
114817
  {
114818
+ "epoch": 4.03,
114819
+ "learning_rate": 2.729643581570559e-06,
114820
  "loss": 0.0001,
114821
  "step": 19107
114822
  },
114823
  {
114824
+ "epoch": 4.03,
114825
+ "learning_rate": 2.7122573167197913e-06,
114826
  "loss": 0.0001,
114827
  "step": 19108
114828
  },
114829
  {
114830
+ "epoch": 4.03,
114831
+ "learning_rate": 2.6948710518690236e-06,
114832
  "loss": 0.0001,
114833
  "step": 19109
114834
  },
114835
  {
114836
+ "epoch": 4.03,
114837
+ "learning_rate": 2.6774847870182554e-06,
114838
+ "loss": 0.0,
114839
  "step": 19110
114840
  },
114841
  {
114842
+ "epoch": 4.03,
114843
+ "learning_rate": 2.6600985221674877e-06,
114844
  "loss": 0.0,
114845
  "step": 19111
114846
  },
114847
  {
114848
+ "epoch": 4.03,
114849
+ "learning_rate": 2.6427122573167196e-06,
114850
  "loss": 0.0001,
114851
  "step": 19112
114852
  },
114853
  {
114854
+ "epoch": 4.03,
114855
+ "learning_rate": 2.625325992465952e-06,
114856
  "loss": 0.0001,
114857
  "step": 19113
114858
  },
114859
  {
114860
+ "epoch": 4.03,
114861
+ "learning_rate": 2.6079397276151838e-06,
114862
+ "loss": 0.0001,
114863
  "step": 19114
114864
  },
114865
  {
114866
+ "epoch": 4.03,
114867
+ "learning_rate": 2.590553462764416e-06,
114868
  "loss": 0.0001,
114869
  "step": 19115
114870
  },
114871
  {
114872
+ "epoch": 4.03,
114873
+ "learning_rate": 2.5731671979136475e-06,
114874
+ "loss": 0.0004,
114875
  "step": 19116
114876
  },
114877
  {
114878
+ "epoch": 4.03,
114879
+ "learning_rate": 2.5557809330628803e-06,
114880
+ "loss": 0.0,
114881
  "step": 19117
114882
  },
114883
  {
114884
+ "epoch": 4.03,
114885
+ "learning_rate": 2.5383946682121126e-06,
114886
+ "loss": 0.0001,
114887
  "step": 19118
114888
  },
114889
  {
114890
+ "epoch": 4.03,
114891
+ "learning_rate": 2.521008403361344e-06,
114892
+ "loss": 0.0,
114893
  "step": 19119
114894
  },
114895
  {
114896
+ "epoch": 4.03,
114897
+ "learning_rate": 2.5036221385105767e-06,
114898
+ "loss": 0.0001,
114899
  "step": 19120
114900
  },
114901
  {
114902
+ "epoch": 4.03,
114903
+ "learning_rate": 2.486235873659808e-06,
114904
+ "loss": 0.0,
114905
  "step": 19121
114906
  },
114907
  {
114908
+ "epoch": 4.03,
114909
+ "learning_rate": 2.4688496088090405e-06,
114910
+ "loss": 0.0001,
114911
  "step": 19122
114912
  },
114913
  {
114914
+ "epoch": 4.03,
114915
+ "learning_rate": 2.4514633439582724e-06,
114916
+ "loss": 0.0002,
114917
  "step": 19123
114918
  },
114919
  {
114920
+ "epoch": 4.03,
114921
+ "learning_rate": 2.4340770791075047e-06,
114922
+ "loss": 0.0001,
114923
  "step": 19124
114924
  },
114925
  {
114926
+ "epoch": 4.03,
114927
+ "learning_rate": 2.416690814256737e-06,
114928
+ "loss": 0.0012,
114929
  "step": 19125
114930
  },
114931
  {
114932
+ "epoch": 4.03,
114933
+ "learning_rate": 2.399304549405969e-06,
114934
+ "loss": 0.0001,
114935
  "step": 19126
114936
  },
114937
  {
114938
+ "epoch": 4.03,
114939
+ "learning_rate": 2.381918284555201e-06,
114940
  "loss": 0.0,
114941
  "step": 19127
114942
  },
114943
  {
114944
+ "epoch": 4.03,
114945
+ "learning_rate": 2.364532019704433e-06,
114946
+ "loss": 0.0001,
114947
  "step": 19128
114948
  },
114949
  {
114950
+ "epoch": 4.03,
114951
+ "learning_rate": 2.3471457548536653e-06,
114952
  "loss": 0.0,
114953
  "step": 19129
114954
  },
114955
  {
114956
+ "epoch": 4.03,
114957
+ "learning_rate": 2.3297594900028976e-06,
114958
+ "loss": 0.0002,
114959
  "step": 19130
114960
  },
114961
  {
114962
+ "epoch": 4.03,
114963
+ "learning_rate": 2.3123732251521295e-06,
114964
  "loss": 0.0001,
114965
  "step": 19131
114966
  },
114967
  {
114968
+ "epoch": 4.03,
114969
+ "learning_rate": 2.294986960301362e-06,
114970
+ "loss": 0.0,
114971
  "step": 19132
114972
  },
114973
  {
114974
+ "epoch": 4.03,
114975
+ "learning_rate": 2.2776006954505937e-06,
114976
+ "loss": 0.0002,
114977
  "step": 19133
114978
  },
114979
  {
114980
+ "epoch": 4.03,
114981
+ "learning_rate": 2.260214430599826e-06,
114982
  "loss": 0.0001,
114983
  "step": 19134
114984
  },
114985
  {
114986
+ "epoch": 4.04,
114987
+ "learning_rate": 2.2428281657490583e-06,
114988
+ "loss": 0.0001,
114989
  "step": 19135
114990
  },
114991
  {
114992
+ "epoch": 4.04,
114993
+ "learning_rate": 2.22544190089829e-06,
114994
  "loss": 0.0001,
114995
  "step": 19136
114996
  },
114997
  {
114998
+ "epoch": 4.04,
114999
+ "learning_rate": 2.2080556360475225e-06,
115000
+ "loss": 0.0002,
115001
  "step": 19137
115002
  },
115003
  {
115004
+ "epoch": 4.04,
115005
+ "learning_rate": 2.1906693711967543e-06,
115006
+ "loss": 0.0003,
115007
  "step": 19138
115008
  },
115009
  {
115010
+ "epoch": 4.04,
115011
+ "learning_rate": 2.173283106345986e-06,
115012
+ "loss": 0.0001,
115013
  "step": 19139
115014
  },
115015
  {
115016
+ "epoch": 4.04,
115017
+ "learning_rate": 2.1558968414952185e-06,
115018
  "loss": 0.0001,
115019
  "step": 19140
115020
  },
115021
  {
115022
+ "epoch": 4.04,
115023
+ "learning_rate": 2.138510576644451e-06,
115024
+ "loss": 0.0001,
115025
  "step": 19141
115026
  },
115027
  {
115028
+ "epoch": 4.04,
115029
+ "learning_rate": 2.1211243117936827e-06,
115030
+ "loss": 0.0002,
115031
  "step": 19142
115032
  },
115033
  {
115034
+ "epoch": 4.04,
115035
+ "learning_rate": 2.103738046942915e-06,
115036
+ "loss": 0.0002,
115037
  "step": 19143
115038
  },
115039
  {
115040
+ "epoch": 4.04,
115041
+ "learning_rate": 2.086351782092147e-06,
115042
+ "loss": 0.0001,
115043
  "step": 19144
115044
  },
115045
  {
115046
+ "epoch": 4.04,
115047
+ "learning_rate": 2.068965517241379e-06,
115048
  "loss": 0.0,
115049
  "step": 19145
115050
  },
115051
  {
115052
+ "epoch": 4.04,
115053
+ "learning_rate": 2.051579252390611e-06,
115054
+ "loss": 0.0001,
115055
  "step": 19146
115056
  },
115057
  {
115058
+ "epoch": 4.04,
115059
+ "learning_rate": 2.0341929875398433e-06,
115060
+ "loss": 0.0001,
115061
  "step": 19147
115062
  },
115063
  {
115064
+ "epoch": 4.04,
115065
+ "learning_rate": 2.0168067226890756e-06,
115066
+ "loss": 0.0002,
115067
  "step": 19148
115068
  },
115069
  {
115070
+ "epoch": 4.04,
115071
+ "learning_rate": 1.9994204578383075e-06,
115072
  "loss": 0.0001,
115073
  "step": 19149
115074
  },
115075
  {
115076
+ "epoch": 4.04,
115077
+ "learning_rate": 1.98203419298754e-06,
115078
+ "loss": 0.0001,
115079
  "step": 19150
115080
  },
115081
  {
115082
+ "epoch": 4.04,
115083
+ "learning_rate": 1.9646479281367717e-06,
115084
+ "loss": 0.0002,
115085
  "step": 19151
115086
  },
115087
  {
115088
+ "epoch": 4.04,
115089
+ "learning_rate": 1.947261663286004e-06,
115090
  "loss": 0.0001,
115091
  "step": 19152
115092
  },
115093
  {
115094
+ "epoch": 4.04,
115095
+ "learning_rate": 1.929875398435236e-06,
115096
+ "loss": 0.0001,
115097
  "step": 19153
115098
  },
115099
  {
115100
+ "epoch": 4.04,
115101
+ "learning_rate": 1.912489133584468e-06,
115102
+ "loss": 0.0001,
115103
  "step": 19154
115104
  },
115105
  {
115106
+ "epoch": 4.04,
115107
+ "learning_rate": 1.8951028687337003e-06,
115108
+ "loss": 0.0002,
115109
  "step": 19155
115110
  },
115111
  {
115112
+ "epoch": 4.04,
115113
+ "learning_rate": 1.8777166038829323e-06,
115114
  "loss": 0.0,
115115
  "step": 19156
115116
  },
115117
  {
115118
+ "epoch": 4.04,
115119
+ "learning_rate": 1.8603303390321644e-06,
115120
+ "loss": 0.0,
115121
  "step": 19157
115122
  },
115123
  {
115124
+ "epoch": 4.04,
115125
+ "learning_rate": 1.8429440741813965e-06,
115126
+ "loss": 0.0002,
115127
  "step": 19158
115128
  },
115129
  {
115130
+ "epoch": 4.04,
115131
+ "learning_rate": 1.8255578093306286e-06,
115132
+ "loss": 0.0001,
115133
  "step": 19159
115134
  },
115135
  {
115136
+ "epoch": 4.04,
115137
+ "learning_rate": 1.8081715444798607e-06,
115138
+ "loss": 0.0001,
115139
  "step": 19160
115140
  },
115141
  {
115142
+ "epoch": 4.04,
115143
+ "learning_rate": 1.7907852796290928e-06,
115144
+ "loss": 0.0001,
115145
  "step": 19161
115146
  },
115147
  {
115148
+ "epoch": 4.04,
115149
+ "learning_rate": 1.773399014778325e-06,
115150
  "loss": 0.0001,
115151
  "step": 19162
115152
  },
115153
  {
115154
+ "epoch": 4.04,
115155
+ "learning_rate": 1.7560127499275572e-06,
115156
+ "loss": 0.0,
115157
  "step": 19163
115158
  },
115159
  {
115160
+ "epoch": 4.04,
115161
+ "learning_rate": 1.7386264850767893e-06,
115162
+ "loss": 0.0,
115163
  "step": 19164
115164
  },
115165
  {
115166
+ "epoch": 4.04,
115167
+ "learning_rate": 1.7212402202260213e-06,
115168
  "loss": 0.0,
115169
  "step": 19165
115170
  },
115171
  {
115172
+ "epoch": 4.04,
115173
+ "learning_rate": 1.7038539553752534e-06,
115174
  "loss": 0.0001,
115175
  "step": 19166
115176
  },
115177
  {
115178
+ "epoch": 4.04,
115179
+ "learning_rate": 1.6864676905244855e-06,
115180
+ "loss": 0.0003,
115181
  "step": 19167
115182
  },
115183
  {
115184
+ "epoch": 4.04,
115185
+ "learning_rate": 1.6690814256737176e-06,
115186
  "loss": 0.0,
115187
  "step": 19168
115188
  },
115189
  {
115190
+ "epoch": 4.04,
115191
+ "learning_rate": 1.65169516082295e-06,
115192
+ "loss": 0.0,
115193
  "step": 19169
115194
  },
115195
  {
115196
+ "epoch": 4.04,
115197
+ "learning_rate": 1.634308895972182e-06,
115198
+ "loss": 0.0002,
115199
  "step": 19170
115200
  },
115201
  {
115202
+ "epoch": 4.04,
115203
+ "learning_rate": 1.616922631121414e-06,
115204
+ "loss": 0.0,
115205
  "step": 19171
115206
  },
115207
  {
115208
+ "epoch": 4.04,
115209
+ "learning_rate": 1.599536366270646e-06,
115210
+ "loss": 0.0001,
115211
  "step": 19172
115212
  },
115213
  {
115214
+ "epoch": 4.04,
115215
+ "learning_rate": 1.582150101419878e-06,
115216
+ "loss": 0.0001,
115217
  "step": 19173
115218
  },
115219
  {
115220
+ "epoch": 4.05,
115221
+ "learning_rate": 1.5647638365691101e-06,
115222
+ "loss": 0.0001,
115223
  "step": 19174
115224
  },
115225
  {
115226
+ "epoch": 4.05,
115227
+ "learning_rate": 1.5473775717183422e-06,
115228
+ "loss": 0.0002,
115229
  "step": 19175
115230
  },
115231
  {
115232
+ "epoch": 4.05,
115233
+ "learning_rate": 1.5299913068675745e-06,
115234
+ "loss": 0.0,
115235
  "step": 19176
115236
  },
115237
  {
115238
+ "epoch": 4.05,
115239
+ "learning_rate": 1.5126050420168066e-06,
115240
+ "loss": 0.0,
115241
  "step": 19177
115242
  },
115243
  {
115244
+ "epoch": 4.05,
115245
+ "learning_rate": 1.4952187771660387e-06,
115246
+ "loss": 0.0002,
115247
  "step": 19178
115248
  },
115249
  {
115250
+ "epoch": 4.05,
115251
+ "learning_rate": 1.4778325123152708e-06,
115252
  "loss": 0.0001,
115253
  "step": 19179
115254
  },
115255
  {
115256
+ "epoch": 4.05,
115257
+ "learning_rate": 1.4604462474645029e-06,
115258
  "loss": 0.0,
115259
  "step": 19180
115260
  },
115261
  {
115262
+ "epoch": 4.05,
115263
+ "learning_rate": 1.443059982613735e-06,
115264
  "loss": 0.0001,
115265
  "step": 19181
115266
  },
115267
  {
115268
+ "epoch": 4.05,
115269
+ "learning_rate": 1.425673717762967e-06,
115270
+ "loss": 0.0,
115271
  "step": 19182
115272
  },
115273
  {
115274
+ "epoch": 4.05,
115275
+ "learning_rate": 1.4082874529121994e-06,
115276
+ "loss": 0.0003,
115277
  "step": 19183
115278
  },
115279
  {
115280
+ "epoch": 4.05,
115281
+ "learning_rate": 1.3909011880614315e-06,
115282
+ "loss": 0.0001,
115283
  "step": 19184
115284
  },
115285
  {
115286
+ "epoch": 4.05,
115287
+ "learning_rate": 1.3735149232106635e-06,
115288
+ "loss": 0.0,
115289
  "step": 19185
115290
  },
115291
  {
115292
+ "epoch": 4.05,
115293
+ "learning_rate": 1.3561286583598956e-06,
115294
  "loss": 0.0,
115295
  "step": 19186
115296
  },
115297
  {
115298
+ "epoch": 4.05,
115299
+ "learning_rate": 1.3387423935091277e-06,
115300
  "loss": 0.0,
115301
  "step": 19187
115302
  },
115303
  {
115304
+ "epoch": 4.05,
115305
+ "learning_rate": 1.3213561286583598e-06,
115306
+ "loss": 0.0,
115307
  "step": 19188
115308
  },
115309
  {
115310
+ "epoch": 4.05,
115311
+ "learning_rate": 1.3039698638075919e-06,
115312
  "loss": 0.0001,
115313
  "step": 19189
115314
  },
115315
  {
115316
+ "epoch": 4.05,
115317
+ "learning_rate": 1.2865835989568238e-06,
115318
+ "loss": 0.0,
115319
  "step": 19190
115320
  },
115321
  {
115322
+ "epoch": 4.05,
115323
+ "learning_rate": 1.2691973341060563e-06,
115324
+ "loss": 0.0001,
115325
  "step": 19191
115326
  },
115327
  {
115328
+ "epoch": 4.05,
115329
+ "learning_rate": 1.2518110692552884e-06,
115330
+ "loss": 0.0001,
115331
  "step": 19192
115332
  },
115333
  {
115334
+ "epoch": 4.05,
115335
+ "learning_rate": 1.2344248044045202e-06,
115336
+ "loss": 0.0,
115337
  "step": 19193
115338
  },
115339
  {
115340
+ "epoch": 4.05,
115341
+ "learning_rate": 1.2170385395537523e-06,
115342
  "loss": 0.0001,
115343
  "step": 19194
115344
  },
115345
  {
115346
+ "epoch": 4.05,
115347
+ "learning_rate": 1.1996522747029844e-06,
115348
+ "loss": 0.0001,
115349
  "step": 19195
115350
  },
115351
  {
115352
+ "epoch": 4.05,
115353
+ "learning_rate": 1.1822660098522165e-06,
115354
+ "loss": 0.0,
115355
  "step": 19196
115356
  },
115357
  {
115358
+ "epoch": 4.05,
115359
+ "learning_rate": 1.1648797450014488e-06,
115360
+ "loss": 0.0001,
115361
  "step": 19197
115362
  },
115363
  {
115364
+ "epoch": 4.05,
115365
+ "learning_rate": 1.147493480150681e-06,
115366
+ "loss": 0.0002,
115367
  "step": 19198
115368
  },
115369
  {
115370
+ "epoch": 4.05,
115371
+ "learning_rate": 1.130107215299913e-06,
115372
+ "loss": 0.0002,
115373
  "step": 19199
115374
  },
115375
  {
115376
+ "epoch": 4.05,
115377
+ "learning_rate": 1.112720950449145e-06,
115378
+ "loss": 0.0001,
115379
  "step": 19200
115380
  },
115381
  {
115382
+ "epoch": 4.05,
115383
+ "learning_rate": 1.0953346855983772e-06,
115384
+ "loss": 0.0001,
115385
  "step": 19201
115386
  },
115387
  {
115388
+ "epoch": 4.05,
115389
+ "learning_rate": 1.0779484207476093e-06,
115390
+ "loss": 0.0,
115391
  "step": 19202
115392
  },
115393
  {
115394
+ "epoch": 4.05,
115395
+ "learning_rate": 1.0605621558968413e-06,
115396
+ "loss": 0.0,
115397
  "step": 19203
115398
  },
115399
  {
115400
+ "epoch": 4.05,
115401
+ "learning_rate": 1.0431758910460734e-06,
115402
+ "loss": 0.0,
115403
  "step": 19204
115404
  },
115405
  {
115406
+ "epoch": 4.05,
115407
+ "learning_rate": 1.0257896261953055e-06,
115408
  "loss": 0.0,
115409
  "step": 19205
115410
  },
115411
  {
115412
+ "epoch": 4.05,
115413
+ "learning_rate": 1.0084033613445378e-06,
115414
+ "loss": 0.0,
115415
  "step": 19206
115416
  },
115417
  {
115418
+ "epoch": 4.05,
115419
+ "learning_rate": 9.9101709649377e-07,
115420
+ "loss": 0.0,
115421
  "step": 19207
115422
  },
115423
  {
115424
+ "epoch": 4.05,
115425
+ "learning_rate": 9.73630831643002e-07,
115426
+ "loss": 0.0002,
115427
  "step": 19208
115428
  },
115429
  {
115430
+ "epoch": 4.05,
115431
+ "learning_rate": 9.56244566792234e-07,
115432
+ "loss": 0.0,
115433
  "step": 19209
115434
  },
115435
  {
115436
+ "epoch": 4.05,
115437
+ "learning_rate": 9.388583019414662e-07,
115438
  "loss": 0.0001,
115439
  "step": 19210
115440
  },
115441
  {
115442
+ "epoch": 4.05,
115443
+ "learning_rate": 9.214720370906983e-07,
115444
  "loss": 0.0001,
115445
  "step": 19211
115446
  },
115447
  {
115448
+ "epoch": 4.06,
115449
+ "learning_rate": 9.040857722399303e-07,
115450
  "loss": 0.0001,
115451
  "step": 19212
115452
  },
115453
  {
115454
+ "epoch": 4.06,
115455
+ "learning_rate": 8.866995073891625e-07,
115456
  "loss": 0.0001,
115457
  "step": 19213
115458
  },
115459
  {
115460
+ "epoch": 4.06,
115461
+ "learning_rate": 8.693132425383946e-07,
115462
+ "loss": 0.0002,
115463
  "step": 19214
115464
  },
115465
  {
115466
+ "epoch": 4.06,
115467
+ "learning_rate": 8.519269776876267e-07,
115468
+ "loss": 0.0001,
115469
  "step": 19215
115470
  },
115471
  {
115472
+ "epoch": 4.06,
115473
+ "learning_rate": 8.345407128368588e-07,
115474
+ "loss": 0.0,
115475
  "step": 19216
115476
  },
115477
  {
115478
+ "epoch": 4.06,
115479
+ "learning_rate": 8.17154447986091e-07,
115480
  "loss": 0.0001,
115481
  "step": 19217
115482
  },
115483
  {
115484
+ "epoch": 4.06,
115485
+ "learning_rate": 7.99768183135323e-07,
115486
+ "loss": 0.0,
115487
  "step": 19218
115488
  },
115489
  {
115490
+ "epoch": 4.06,
115491
+ "learning_rate": 7.823819182845551e-07,
115492
+ "loss": 0.0,
115493
  "step": 19219
115494
  },
115495
  {
115496
+ "epoch": 4.06,
115497
+ "learning_rate": 7.649956534337873e-07,
115498
  "loss": 0.0,
115499
  "step": 19220
115500
  },
115501
  {
115502
+ "epoch": 4.06,
115503
+ "learning_rate": 7.476093885830194e-07,
115504
+ "loss": 0.0001,
115505
  "step": 19221
115506
  },
115507
  {
115508
+ "epoch": 4.06,
115509
+ "learning_rate": 7.302231237322514e-07,
115510
+ "loss": 0.0,
115511
  "step": 19222
115512
  },
115513
  {
115514
+ "epoch": 4.06,
115515
+ "learning_rate": 7.128368588814835e-07,
115516
+ "loss": 0.0,
115517
  "step": 19223
115518
  },
115519
  {
115520
+ "epoch": 4.06,
115521
+ "learning_rate": 6.954505940307157e-07,
115522
+ "loss": 0.0,
115523
  "step": 19224
115524
  },
115525
  {
115526
+ "epoch": 4.06,
115527
+ "learning_rate": 6.780643291799478e-07,
115528
+ "loss": 0.0002,
115529
  "step": 19225
115530
  },
115531
  {
115532
+ "epoch": 4.06,
115533
+ "learning_rate": 6.606780643291799e-07,
115534
+ "loss": 0.0,
115535
  "step": 19226
115536
  },
115537
  {
115538
+ "epoch": 4.06,
115539
+ "learning_rate": 6.432917994784119e-07,
115540
+ "loss": 0.0,
115541
  "step": 19227
115542
  },
115543
  {
115544
+ "epoch": 4.06,
115545
+ "learning_rate": 6.259055346276442e-07,
115546
+ "loss": 0.0001,
115547
  "step": 19228
115548
  },
115549
  {
115550
+ "epoch": 4.06,
115551
+ "learning_rate": 6.085192697768762e-07,
115552
  "loss": 0.0,
115553
  "step": 19229
115554
  },
115555
  {
115556
+ "epoch": 4.06,
115557
+ "learning_rate": 5.911330049261083e-07,
115558
  "loss": 0.0001,
115559
  "step": 19230
115560
  },
115561
  {
115562
+ "epoch": 4.06,
115563
+ "learning_rate": 5.737467400753404e-07,
115564
+ "loss": 0.0,
115565
  "step": 19231
115566
  },
115567
  {
115568
+ "epoch": 4.06,
115569
+ "learning_rate": 5.563604752245725e-07,
115570
+ "loss": 0.0003,
115571
  "step": 19232
115572
  },
115573
  {
115574
+ "epoch": 4.06,
115575
+ "learning_rate": 5.389742103738046e-07,
115576
+ "loss": 0.0001,
115577
  "step": 19233
115578
  },
115579
  {
115580
+ "epoch": 4.06,
115581
+ "learning_rate": 5.215879455230367e-07,
115582
  "loss": 0.0001,
115583
  "step": 19234
115584
  },
115585
  {
115586
+ "epoch": 4.06,
115587
+ "learning_rate": 5.042016806722689e-07,
115588
  "loss": 0.0001,
115589
  "step": 19235
115590
  },
115591
  {
115592
+ "epoch": 4.06,
115593
+ "learning_rate": 4.86815415821501e-07,
115594
+ "loss": 0.0001,
115595
  "step": 19236
115596
  },
115597
  {
115598
+ "epoch": 4.06,
115599
+ "learning_rate": 4.694291509707331e-07,
115600
+ "loss": 0.0,
115601
  "step": 19237
115602
  },
115603
  {
115604
+ "epoch": 4.06,
115605
+ "learning_rate": 4.520428861199652e-07,
115606
  "loss": 0.0001,
115607
  "step": 19238
115608
  },
115609
  {
115610
+ "epoch": 4.06,
115611
+ "learning_rate": 4.346566212691973e-07,
115612
  "loss": 0.0001,
115613
  "step": 19239
115614
  },
115615
  {
115616
+ "epoch": 4.06,
115617
+ "learning_rate": 4.172703564184294e-07,
115618
  "loss": 0.0,
115619
  "step": 19240
115620
  },
115621
  {
115622
+ "epoch": 4.06,
115623
+ "learning_rate": 3.998840915676615e-07,
115624
+ "loss": 0.0001,
115625
  "step": 19241
115626
  },
115627
  {
115628
+ "epoch": 4.06,
115629
+ "learning_rate": 3.8249782671689363e-07,
115630
  "loss": 0.0,
115631
  "step": 19242
115632
  },
115633
  {
115634
+ "epoch": 4.06,
115635
+ "learning_rate": 3.651115618661257e-07,
115636
+ "loss": 0.0001,
115637
  "step": 19243
115638
  },
115639
  {
115640
+ "epoch": 4.06,
115641
+ "learning_rate": 3.4772529701535786e-07,
115642
+ "loss": 0.0,
115643
  "step": 19244
115644
  },
115645
  {
115646
+ "epoch": 4.06,
115647
+ "learning_rate": 3.3033903216458995e-07,
115648
+ "loss": 0.0,
115649
  "step": 19245
115650
  },
115651
  {
115652
+ "epoch": 4.06,
115653
+ "learning_rate": 3.129527673138221e-07,
115654
+ "loss": 0.0002,
115655
  "step": 19246
115656
  },
115657
  {
115658
+ "epoch": 4.06,
115659
+ "learning_rate": 2.9556650246305413e-07,
115660
+ "loss": 0.0,
115661
  "step": 19247
115662
  },
115663
  {
115664
+ "epoch": 4.06,
115665
+ "learning_rate": 2.7818023761228627e-07,
115666
  "loss": 0.0001,
115667
  "step": 19248
115668
  },
115669
  {
115670
+ "epoch": 4.06,
115671
+ "learning_rate": 2.6079397276151836e-07,
115672
+ "loss": 0.0002,
115673
  "step": 19249
115674
  },
115675
  {
115676
+ "epoch": 4.06,
115677
+ "learning_rate": 2.434077079107505e-07,
115678
+ "loss": 0.0015,
115679
  "step": 19250
115680
  },
115681
  {
115682
+ "epoch": 4.07,
115683
+ "learning_rate": 2.260214430599826e-07,
115684
+ "loss": 0.0,
115685
+ "step": 19251
115686
+ },
115687
+ {
115688
+ "epoch": 4.07,
115689
+ "learning_rate": 2.086351782092147e-07,
115690
+ "loss": 0.0001,
115691
+ "step": 19252
115692
+ },
115693
+ {
115694
+ "epoch": 4.07,
115695
+ "learning_rate": 1.9124891335844682e-07,
115696
+ "loss": 0.0,
115697
+ "step": 19253
115698
+ },
115699
+ {
115700
+ "epoch": 4.07,
115701
+ "learning_rate": 1.7386264850767893e-07,
115702
+ "loss": 0.0002,
115703
+ "step": 19254
115704
+ },
115705
+ {
115706
+ "epoch": 4.07,
115707
+ "learning_rate": 1.5647638365691105e-07,
115708
+ "loss": 0.0001,
115709
+ "step": 19255
115710
+ },
115711
+ {
115712
+ "epoch": 4.07,
115713
+ "step": 19255,
115714
+ "total_flos": 4.709738606122584e+20,
115715
+ "train_loss": 5.854813561474184e-06,
115716
+ "train_runtime": 574.8329,
115717
+ "train_samples_per_second": 2143.301,
115718
+ "train_steps_per_second": 33.497
115719
  }
115720
  ],
115721
+ "max_steps": 19255,
115722
  "num_train_epochs": 5,
115723
+ "total_flos": 4.709738606122584e+20,
115724
  "trial_name": null,
115725
  "trial_params": null
115726
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad89e76b0670cd2e61d285d1c083b63650b30ff0824b129c7770656484dc7c02
3
  size 3247
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46a8db1eb3abf552e505c4d382ce37db4dc5d44734d8cd766198dc27b5c70d61
3
  size 3247