CreatorPhan commited on
Commit
cd45751
1 Parent(s): 28812b3

Upload folder using huggingface_hub (#6)

Browse files

- Upload folder using huggingface_hub (9c5d0e29fdd380a82a1705ff8663c721c0a396a3)

Files changed (5) hide show
  1. adapter_model.bin +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +1203 -3
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83a2bd54ab68a3dcad8fe0e5a630e45f5342750c381fd29ab4100c5db3d4e0e9
3
  size 39409357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed6e90196f5a274b5d5ff0f18e648b6396fc99189dc82111fcfd2e83656a72f7
3
  size 39409357
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81ef498b8b8b31a2a5c36ffe3529e1378029eb4c3e4ba4770de0c248e4c62950
3
  size 78844421
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe640e1cc583a2fcc1f7be360da696c6de30f28f00d5f64bbb3f586eab33160
3
  size 78844421
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36fc71bd44bd7f04f2599c5dface64c517de1a7ab7bac3600f3f6470c6c72673
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab62043be50b93d4eb28964be2d945176db3d64fe73ddd052a7656ba9141c683
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6badaa6737fa9306a1f25b19c54f91672e90aa94d505b95ab467e7d08dd541
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b34852b759fa1b56e14c21698a89fa4737f1160151f73515f2ad9b2b1334acd
3
  size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.908208296557811,
5
  "eval_steps": 500,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4207,13 +4207,1213 @@
4207
  "learning_rate": 0.00010056818181818181,
4208
  "loss": 0.6347,
4209
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4210
  }
4211
  ],
4212
  "logging_steps": 1,
4213
  "max_steps": 1408,
4214
  "num_train_epochs": 16,
4215
  "save_steps": 100,
4216
- "total_flos": 9.550264840009421e+17,
4217
  "trial_name": null,
4218
  "trial_params": null
4219
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.167696381288614,
5
  "eval_steps": 500,
6
+ "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4207
  "learning_rate": 0.00010056818181818181,
4208
  "loss": 0.6347,
4209
  "step": 700
4210
+ },
4211
+ {
4212
+ "epoch": 7.92,
4213
+ "learning_rate": 0.00010042613636363636,
4214
+ "loss": 0.6857,
4215
+ "step": 701
4216
+ },
4217
+ {
4218
+ "epoch": 7.93,
4219
+ "learning_rate": 0.00010028409090909093,
4220
+ "loss": 0.6677,
4221
+ "step": 702
4222
+ },
4223
+ {
4224
+ "epoch": 7.94,
4225
+ "learning_rate": 0.00010014204545454546,
4226
+ "loss": 0.6697,
4227
+ "step": 703
4228
+ },
4229
+ {
4230
+ "epoch": 7.95,
4231
+ "learning_rate": 0.0001,
4232
+ "loss": 0.6375,
4233
+ "step": 704
4234
+ },
4235
+ {
4236
+ "epoch": 7.96,
4237
+ "learning_rate": 9.985795454545455e-05,
4238
+ "loss": 0.6572,
4239
+ "step": 705
4240
+ },
4241
+ {
4242
+ "epoch": 7.98,
4243
+ "learning_rate": 9.97159090909091e-05,
4244
+ "loss": 0.668,
4245
+ "step": 706
4246
+ },
4247
+ {
4248
+ "epoch": 7.99,
4249
+ "learning_rate": 9.957386363636364e-05,
4250
+ "loss": 0.6797,
4251
+ "step": 707
4252
+ },
4253
+ {
4254
+ "epoch": 8.0,
4255
+ "learning_rate": 9.943181818181819e-05,
4256
+ "loss": 0.6784,
4257
+ "step": 708
4258
+ },
4259
+ {
4260
+ "epoch": 8.01,
4261
+ "learning_rate": 9.928977272727273e-05,
4262
+ "loss": 0.6192,
4263
+ "step": 709
4264
+ },
4265
+ {
4266
+ "epoch": 8.02,
4267
+ "learning_rate": 9.914772727272728e-05,
4268
+ "loss": 0.6287,
4269
+ "step": 710
4270
+ },
4271
+ {
4272
+ "epoch": 8.03,
4273
+ "learning_rate": 9.900568181818183e-05,
4274
+ "loss": 0.6034,
4275
+ "step": 711
4276
+ },
4277
+ {
4278
+ "epoch": 8.04,
4279
+ "learning_rate": 9.886363636363637e-05,
4280
+ "loss": 0.6167,
4281
+ "step": 712
4282
+ },
4283
+ {
4284
+ "epoch": 8.06,
4285
+ "learning_rate": 9.872159090909091e-05,
4286
+ "loss": 0.6353,
4287
+ "step": 713
4288
+ },
4289
+ {
4290
+ "epoch": 8.07,
4291
+ "learning_rate": 9.857954545454547e-05,
4292
+ "loss": 0.6222,
4293
+ "step": 714
4294
+ },
4295
+ {
4296
+ "epoch": 8.08,
4297
+ "learning_rate": 9.84375e-05,
4298
+ "loss": 0.5963,
4299
+ "step": 715
4300
+ },
4301
+ {
4302
+ "epoch": 8.09,
4303
+ "learning_rate": 9.829545454545455e-05,
4304
+ "loss": 0.6042,
4305
+ "step": 716
4306
+ },
4307
+ {
4308
+ "epoch": 8.1,
4309
+ "learning_rate": 9.81534090909091e-05,
4310
+ "loss": 0.612,
4311
+ "step": 717
4312
+ },
4313
+ {
4314
+ "epoch": 8.11,
4315
+ "learning_rate": 9.801136363636364e-05,
4316
+ "loss": 0.6069,
4317
+ "step": 718
4318
+ },
4319
+ {
4320
+ "epoch": 8.12,
4321
+ "learning_rate": 9.786931818181818e-05,
4322
+ "loss": 0.6001,
4323
+ "step": 719
4324
+ },
4325
+ {
4326
+ "epoch": 8.13,
4327
+ "learning_rate": 9.772727272727274e-05,
4328
+ "loss": 0.6007,
4329
+ "step": 720
4330
+ },
4331
+ {
4332
+ "epoch": 8.15,
4333
+ "learning_rate": 9.758522727272727e-05,
4334
+ "loss": 0.6079,
4335
+ "step": 721
4336
+ },
4337
+ {
4338
+ "epoch": 8.16,
4339
+ "learning_rate": 9.744318181818183e-05,
4340
+ "loss": 0.6216,
4341
+ "step": 722
4342
+ },
4343
+ {
4344
+ "epoch": 8.17,
4345
+ "learning_rate": 9.730113636363637e-05,
4346
+ "loss": 0.6321,
4347
+ "step": 723
4348
+ },
4349
+ {
4350
+ "epoch": 8.18,
4351
+ "learning_rate": 9.71590909090909e-05,
4352
+ "loss": 0.6044,
4353
+ "step": 724
4354
+ },
4355
+ {
4356
+ "epoch": 8.19,
4357
+ "learning_rate": 9.701704545454547e-05,
4358
+ "loss": 0.6028,
4359
+ "step": 725
4360
+ },
4361
+ {
4362
+ "epoch": 8.2,
4363
+ "learning_rate": 9.687500000000001e-05,
4364
+ "loss": 0.6098,
4365
+ "step": 726
4366
+ },
4367
+ {
4368
+ "epoch": 8.21,
4369
+ "learning_rate": 9.673295454545454e-05,
4370
+ "loss": 0.6032,
4371
+ "step": 727
4372
+ },
4373
+ {
4374
+ "epoch": 8.22,
4375
+ "learning_rate": 9.65909090909091e-05,
4376
+ "loss": 0.6298,
4377
+ "step": 728
4378
+ },
4379
+ {
4380
+ "epoch": 8.24,
4381
+ "learning_rate": 9.644886363636365e-05,
4382
+ "loss": 0.6115,
4383
+ "step": 729
4384
+ },
4385
+ {
4386
+ "epoch": 8.25,
4387
+ "learning_rate": 9.630681818181818e-05,
4388
+ "loss": 0.6052,
4389
+ "step": 730
4390
+ },
4391
+ {
4392
+ "epoch": 8.26,
4393
+ "learning_rate": 9.616477272727274e-05,
4394
+ "loss": 0.6097,
4395
+ "step": 731
4396
+ },
4397
+ {
4398
+ "epoch": 8.27,
4399
+ "learning_rate": 9.602272727272728e-05,
4400
+ "loss": 0.6062,
4401
+ "step": 732
4402
+ },
4403
+ {
4404
+ "epoch": 8.28,
4405
+ "learning_rate": 9.588068181818183e-05,
4406
+ "loss": 0.5984,
4407
+ "step": 733
4408
+ },
4409
+ {
4410
+ "epoch": 8.29,
4411
+ "learning_rate": 9.573863636363637e-05,
4412
+ "loss": 0.6432,
4413
+ "step": 734
4414
+ },
4415
+ {
4416
+ "epoch": 8.3,
4417
+ "learning_rate": 9.559659090909092e-05,
4418
+ "loss": 0.5814,
4419
+ "step": 735
4420
+ },
4421
+ {
4422
+ "epoch": 8.31,
4423
+ "learning_rate": 9.545454545454546e-05,
4424
+ "loss": 0.5965,
4425
+ "step": 736
4426
+ },
4427
+ {
4428
+ "epoch": 8.33,
4429
+ "learning_rate": 9.53125e-05,
4430
+ "loss": 0.6102,
4431
+ "step": 737
4432
+ },
4433
+ {
4434
+ "epoch": 8.34,
4435
+ "learning_rate": 9.517045454545455e-05,
4436
+ "loss": 0.5849,
4437
+ "step": 738
4438
+ },
4439
+ {
4440
+ "epoch": 8.35,
4441
+ "learning_rate": 9.50284090909091e-05,
4442
+ "loss": 0.6062,
4443
+ "step": 739
4444
+ },
4445
+ {
4446
+ "epoch": 8.36,
4447
+ "learning_rate": 9.488636363636364e-05,
4448
+ "loss": 0.6031,
4449
+ "step": 740
4450
+ },
4451
+ {
4452
+ "epoch": 8.37,
4453
+ "learning_rate": 9.474431818181819e-05,
4454
+ "loss": 0.5932,
4455
+ "step": 741
4456
+ },
4457
+ {
4458
+ "epoch": 8.38,
4459
+ "learning_rate": 9.460227272727273e-05,
4460
+ "loss": 0.589,
4461
+ "step": 742
4462
+ },
4463
+ {
4464
+ "epoch": 8.39,
4465
+ "learning_rate": 9.446022727272728e-05,
4466
+ "loss": 0.6096,
4467
+ "step": 743
4468
+ },
4469
+ {
4470
+ "epoch": 8.41,
4471
+ "learning_rate": 9.431818181818182e-05,
4472
+ "loss": 0.601,
4473
+ "step": 744
4474
+ },
4475
+ {
4476
+ "epoch": 8.42,
4477
+ "learning_rate": 9.417613636363637e-05,
4478
+ "loss": 0.5798,
4479
+ "step": 745
4480
+ },
4481
+ {
4482
+ "epoch": 8.43,
4483
+ "learning_rate": 9.403409090909091e-05,
4484
+ "loss": 0.59,
4485
+ "step": 746
4486
+ },
4487
+ {
4488
+ "epoch": 8.44,
4489
+ "learning_rate": 9.389204545454546e-05,
4490
+ "loss": 0.5988,
4491
+ "step": 747
4492
+ },
4493
+ {
4494
+ "epoch": 8.45,
4495
+ "learning_rate": 9.375e-05,
4496
+ "loss": 0.5591,
4497
+ "step": 748
4498
+ },
4499
+ {
4500
+ "epoch": 8.46,
4501
+ "learning_rate": 9.360795454545455e-05,
4502
+ "loss": 0.5939,
4503
+ "step": 749
4504
+ },
4505
+ {
4506
+ "epoch": 8.47,
4507
+ "learning_rate": 9.346590909090909e-05,
4508
+ "loss": 0.5886,
4509
+ "step": 750
4510
+ },
4511
+ {
4512
+ "epoch": 8.48,
4513
+ "learning_rate": 9.332386363636364e-05,
4514
+ "loss": 0.5994,
4515
+ "step": 751
4516
+ },
4517
+ {
4518
+ "epoch": 8.5,
4519
+ "learning_rate": 9.318181818181818e-05,
4520
+ "loss": 0.5821,
4521
+ "step": 752
4522
+ },
4523
+ {
4524
+ "epoch": 8.51,
4525
+ "learning_rate": 9.303977272727273e-05,
4526
+ "loss": 0.602,
4527
+ "step": 753
4528
+ },
4529
+ {
4530
+ "epoch": 8.52,
4531
+ "learning_rate": 9.289772727272727e-05,
4532
+ "loss": 0.5708,
4533
+ "step": 754
4534
+ },
4535
+ {
4536
+ "epoch": 8.53,
4537
+ "learning_rate": 9.275568181818183e-05,
4538
+ "loss": 0.5902,
4539
+ "step": 755
4540
+ },
4541
+ {
4542
+ "epoch": 8.54,
4543
+ "learning_rate": 9.261363636363636e-05,
4544
+ "loss": 0.6053,
4545
+ "step": 756
4546
+ },
4547
+ {
4548
+ "epoch": 8.55,
4549
+ "learning_rate": 9.247159090909091e-05,
4550
+ "loss": 0.5797,
4551
+ "step": 757
4552
+ },
4553
+ {
4554
+ "epoch": 8.56,
4555
+ "learning_rate": 9.232954545454547e-05,
4556
+ "loss": 0.5965,
4557
+ "step": 758
4558
+ },
4559
+ {
4560
+ "epoch": 8.57,
4561
+ "learning_rate": 9.21875e-05,
4562
+ "loss": 0.5738,
4563
+ "step": 759
4564
+ },
4565
+ {
4566
+ "epoch": 8.59,
4567
+ "learning_rate": 9.204545454545454e-05,
4568
+ "loss": 0.5819,
4569
+ "step": 760
4570
+ },
4571
+ {
4572
+ "epoch": 8.6,
4573
+ "learning_rate": 9.19034090909091e-05,
4574
+ "loss": 0.5994,
4575
+ "step": 761
4576
+ },
4577
+ {
4578
+ "epoch": 8.61,
4579
+ "learning_rate": 9.176136363636363e-05,
4580
+ "loss": 0.5738,
4581
+ "step": 762
4582
+ },
4583
+ {
4584
+ "epoch": 8.62,
4585
+ "learning_rate": 9.161931818181818e-05,
4586
+ "loss": 0.5663,
4587
+ "step": 763
4588
+ },
4589
+ {
4590
+ "epoch": 8.63,
4591
+ "learning_rate": 9.147727272727274e-05,
4592
+ "loss": 0.5798,
4593
+ "step": 764
4594
+ },
4595
+ {
4596
+ "epoch": 8.64,
4597
+ "learning_rate": 9.133522727272727e-05,
4598
+ "loss": 0.5705,
4599
+ "step": 765
4600
+ },
4601
+ {
4602
+ "epoch": 8.65,
4603
+ "learning_rate": 9.119318181818183e-05,
4604
+ "loss": 0.5943,
4605
+ "step": 766
4606
+ },
4607
+ {
4608
+ "epoch": 8.67,
4609
+ "learning_rate": 9.105113636363637e-05,
4610
+ "loss": 0.6019,
4611
+ "step": 767
4612
+ },
4613
+ {
4614
+ "epoch": 8.68,
4615
+ "learning_rate": 9.090909090909092e-05,
4616
+ "loss": 0.5733,
4617
+ "step": 768
4618
+ },
4619
+ {
4620
+ "epoch": 8.69,
4621
+ "learning_rate": 9.076704545454546e-05,
4622
+ "loss": 0.575,
4623
+ "step": 769
4624
+ },
4625
+ {
4626
+ "epoch": 8.7,
4627
+ "learning_rate": 9.062500000000001e-05,
4628
+ "loss": 0.5675,
4629
+ "step": 770
4630
+ },
4631
+ {
4632
+ "epoch": 8.71,
4633
+ "learning_rate": 9.048295454545455e-05,
4634
+ "loss": 0.566,
4635
+ "step": 771
4636
+ },
4637
+ {
4638
+ "epoch": 8.72,
4639
+ "learning_rate": 9.03409090909091e-05,
4640
+ "loss": 0.5513,
4641
+ "step": 772
4642
+ },
4643
+ {
4644
+ "epoch": 8.73,
4645
+ "learning_rate": 9.019886363636364e-05,
4646
+ "loss": 0.5682,
4647
+ "step": 773
4648
+ },
4649
+ {
4650
+ "epoch": 8.74,
4651
+ "learning_rate": 9.005681818181819e-05,
4652
+ "loss": 0.5508,
4653
+ "step": 774
4654
+ },
4655
+ {
4656
+ "epoch": 8.76,
4657
+ "learning_rate": 8.991477272727273e-05,
4658
+ "loss": 0.5668,
4659
+ "step": 775
4660
+ },
4661
+ {
4662
+ "epoch": 8.77,
4663
+ "learning_rate": 8.977272727272728e-05,
4664
+ "loss": 0.569,
4665
+ "step": 776
4666
+ },
4667
+ {
4668
+ "epoch": 8.78,
4669
+ "learning_rate": 8.963068181818182e-05,
4670
+ "loss": 0.5897,
4671
+ "step": 777
4672
+ },
4673
+ {
4674
+ "epoch": 8.79,
4675
+ "learning_rate": 8.948863636363637e-05,
4676
+ "loss": 0.5738,
4677
+ "step": 778
4678
+ },
4679
+ {
4680
+ "epoch": 8.8,
4681
+ "learning_rate": 8.934659090909091e-05,
4682
+ "loss": 0.5511,
4683
+ "step": 779
4684
+ },
4685
+ {
4686
+ "epoch": 8.81,
4687
+ "learning_rate": 8.920454545454546e-05,
4688
+ "loss": 0.5659,
4689
+ "step": 780
4690
+ },
4691
+ {
4692
+ "epoch": 8.82,
4693
+ "learning_rate": 8.90625e-05,
4694
+ "loss": 0.5649,
4695
+ "step": 781
4696
+ },
4697
+ {
4698
+ "epoch": 8.83,
4699
+ "learning_rate": 8.892045454545455e-05,
4700
+ "loss": 0.5618,
4701
+ "step": 782
4702
+ },
4703
+ {
4704
+ "epoch": 8.85,
4705
+ "learning_rate": 8.87784090909091e-05,
4706
+ "loss": 0.5602,
4707
+ "step": 783
4708
+ },
4709
+ {
4710
+ "epoch": 8.86,
4711
+ "learning_rate": 8.863636363636364e-05,
4712
+ "loss": 0.5723,
4713
+ "step": 784
4714
+ },
4715
+ {
4716
+ "epoch": 8.87,
4717
+ "learning_rate": 8.849431818181818e-05,
4718
+ "loss": 0.5816,
4719
+ "step": 785
4720
+ },
4721
+ {
4722
+ "epoch": 8.88,
4723
+ "learning_rate": 8.835227272727273e-05,
4724
+ "loss": 0.555,
4725
+ "step": 786
4726
+ },
4727
+ {
4728
+ "epoch": 8.89,
4729
+ "learning_rate": 8.821022727272727e-05,
4730
+ "loss": 0.5563,
4731
+ "step": 787
4732
+ },
4733
+ {
4734
+ "epoch": 8.9,
4735
+ "learning_rate": 8.806818181818183e-05,
4736
+ "loss": 0.554,
4737
+ "step": 788
4738
+ },
4739
+ {
4740
+ "epoch": 8.91,
4741
+ "learning_rate": 8.792613636363636e-05,
4742
+ "loss": 0.5671,
4743
+ "step": 789
4744
+ },
4745
+ {
4746
+ "epoch": 8.92,
4747
+ "learning_rate": 8.778409090909091e-05,
4748
+ "loss": 0.5485,
4749
+ "step": 790
4750
+ },
4751
+ {
4752
+ "epoch": 8.94,
4753
+ "learning_rate": 8.764204545454547e-05,
4754
+ "loss": 0.5712,
4755
+ "step": 791
4756
+ },
4757
+ {
4758
+ "epoch": 8.95,
4759
+ "learning_rate": 8.75e-05,
4760
+ "loss": 0.5507,
4761
+ "step": 792
4762
+ },
4763
+ {
4764
+ "epoch": 8.96,
4765
+ "learning_rate": 8.735795454545454e-05,
4766
+ "loss": 0.5718,
4767
+ "step": 793
4768
+ },
4769
+ {
4770
+ "epoch": 8.97,
4771
+ "learning_rate": 8.72159090909091e-05,
4772
+ "loss": 0.5585,
4773
+ "step": 794
4774
+ },
4775
+ {
4776
+ "epoch": 8.98,
4777
+ "learning_rate": 8.707386363636363e-05,
4778
+ "loss": 0.5563,
4779
+ "step": 795
4780
+ },
4781
+ {
4782
+ "epoch": 8.99,
4783
+ "learning_rate": 8.693181818181818e-05,
4784
+ "loss": 0.581,
4785
+ "step": 796
4786
+ },
4787
+ {
4788
+ "epoch": 9.0,
4789
+ "learning_rate": 8.678977272727274e-05,
4790
+ "loss": 0.5511,
4791
+ "step": 797
4792
+ },
4793
+ {
4794
+ "epoch": 9.02,
4795
+ "learning_rate": 8.664772727272727e-05,
4796
+ "loss": 0.5103,
4797
+ "step": 798
4798
+ },
4799
+ {
4800
+ "epoch": 9.03,
4801
+ "learning_rate": 8.650568181818183e-05,
4802
+ "loss": 0.5323,
4803
+ "step": 799
4804
+ },
4805
+ {
4806
+ "epoch": 9.04,
4807
+ "learning_rate": 8.636363636363637e-05,
4808
+ "loss": 0.5092,
4809
+ "step": 800
4810
+ },
4811
+ {
4812
+ "epoch": 9.05,
4813
+ "learning_rate": 8.62215909090909e-05,
4814
+ "loss": 0.5247,
4815
+ "step": 801
4816
+ },
4817
+ {
4818
+ "epoch": 9.06,
4819
+ "learning_rate": 8.607954545454546e-05,
4820
+ "loss": 0.5403,
4821
+ "step": 802
4822
+ },
4823
+ {
4824
+ "epoch": 9.07,
4825
+ "learning_rate": 8.593750000000001e-05,
4826
+ "loss": 0.5252,
4827
+ "step": 803
4828
+ },
4829
+ {
4830
+ "epoch": 9.08,
4831
+ "learning_rate": 8.579545454545454e-05,
4832
+ "loss": 0.5296,
4833
+ "step": 804
4834
+ },
4835
+ {
4836
+ "epoch": 9.09,
4837
+ "learning_rate": 8.56534090909091e-05,
4838
+ "loss": 0.5223,
4839
+ "step": 805
4840
+ },
4841
+ {
4842
+ "epoch": 9.11,
4843
+ "learning_rate": 8.551136363636364e-05,
4844
+ "loss": 0.4972,
4845
+ "step": 806
4846
+ },
4847
+ {
4848
+ "epoch": 9.12,
4849
+ "learning_rate": 8.536931818181818e-05,
4850
+ "loss": 0.5005,
4851
+ "step": 807
4852
+ },
4853
+ {
4854
+ "epoch": 9.13,
4855
+ "learning_rate": 8.522727272727273e-05,
4856
+ "loss": 0.5249,
4857
+ "step": 808
4858
+ },
4859
+ {
4860
+ "epoch": 9.14,
4861
+ "learning_rate": 8.508522727272728e-05,
4862
+ "loss": 0.5135,
4863
+ "step": 809
4864
+ },
4865
+ {
4866
+ "epoch": 9.15,
4867
+ "learning_rate": 8.494318181818182e-05,
4868
+ "loss": 0.5053,
4869
+ "step": 810
4870
+ },
4871
+ {
4872
+ "epoch": 9.16,
4873
+ "learning_rate": 8.480113636363637e-05,
4874
+ "loss": 0.5158,
4875
+ "step": 811
4876
+ },
4877
+ {
4878
+ "epoch": 9.17,
4879
+ "learning_rate": 8.465909090909091e-05,
4880
+ "loss": 0.5061,
4881
+ "step": 812
4882
+ },
4883
+ {
4884
+ "epoch": 9.18,
4885
+ "learning_rate": 8.451704545454546e-05,
4886
+ "loss": 0.4988,
4887
+ "step": 813
4888
+ },
4889
+ {
4890
+ "epoch": 9.2,
4891
+ "learning_rate": 8.4375e-05,
4892
+ "loss": 0.5273,
4893
+ "step": 814
4894
+ },
4895
+ {
4896
+ "epoch": 9.21,
4897
+ "learning_rate": 8.423295454545455e-05,
4898
+ "loss": 0.5332,
4899
+ "step": 815
4900
+ },
4901
+ {
4902
+ "epoch": 9.22,
4903
+ "learning_rate": 8.40909090909091e-05,
4904
+ "loss": 0.5181,
4905
+ "step": 816
4906
+ },
4907
+ {
4908
+ "epoch": 9.23,
4909
+ "learning_rate": 8.394886363636364e-05,
4910
+ "loss": 0.5085,
4911
+ "step": 817
4912
+ },
4913
+ {
4914
+ "epoch": 9.24,
4915
+ "learning_rate": 8.380681818181818e-05,
4916
+ "loss": 0.5137,
4917
+ "step": 818
4918
+ },
4919
+ {
4920
+ "epoch": 9.25,
4921
+ "learning_rate": 8.366477272727273e-05,
4922
+ "loss": 0.5195,
4923
+ "step": 819
4924
+ },
4925
+ {
4926
+ "epoch": 9.26,
4927
+ "learning_rate": 8.352272727272727e-05,
4928
+ "loss": 0.5077,
4929
+ "step": 820
4930
+ },
4931
+ {
4932
+ "epoch": 9.28,
4933
+ "learning_rate": 8.338068181818183e-05,
4934
+ "loss": 0.5074,
4935
+ "step": 821
4936
+ },
4937
+ {
4938
+ "epoch": 9.29,
4939
+ "learning_rate": 8.323863636363637e-05,
4940
+ "loss": 0.5142,
4941
+ "step": 822
4942
+ },
4943
+ {
4944
+ "epoch": 9.3,
4945
+ "learning_rate": 8.309659090909091e-05,
4946
+ "loss": 0.5116,
4947
+ "step": 823
4948
+ },
4949
+ {
4950
+ "epoch": 9.31,
4951
+ "learning_rate": 8.295454545454547e-05,
4952
+ "loss": 0.4974,
4953
+ "step": 824
4954
+ },
4955
+ {
4956
+ "epoch": 9.32,
4957
+ "learning_rate": 8.28125e-05,
4958
+ "loss": 0.5117,
4959
+ "step": 825
4960
+ },
4961
+ {
4962
+ "epoch": 9.33,
4963
+ "learning_rate": 8.267045454545455e-05,
4964
+ "loss": 0.5114,
4965
+ "step": 826
4966
+ },
4967
+ {
4968
+ "epoch": 9.34,
4969
+ "learning_rate": 8.25284090909091e-05,
4970
+ "loss": 0.5039,
4971
+ "step": 827
4972
+ },
4973
+ {
4974
+ "epoch": 9.35,
4975
+ "learning_rate": 8.238636363636364e-05,
4976
+ "loss": 0.498,
4977
+ "step": 828
4978
+ },
4979
+ {
4980
+ "epoch": 9.37,
4981
+ "learning_rate": 8.224431818181818e-05,
4982
+ "loss": 0.5042,
4983
+ "step": 829
4984
+ },
4985
+ {
4986
+ "epoch": 9.38,
4987
+ "learning_rate": 8.210227272727274e-05,
4988
+ "loss": 0.5049,
4989
+ "step": 830
4990
+ },
4991
+ {
4992
+ "epoch": 9.39,
4993
+ "learning_rate": 8.196022727272727e-05,
4994
+ "loss": 0.5123,
4995
+ "step": 831
4996
+ },
4997
+ {
4998
+ "epoch": 9.4,
4999
+ "learning_rate": 8.181818181818183e-05,
5000
+ "loss": 0.4907,
5001
+ "step": 832
5002
+ },
5003
+ {
5004
+ "epoch": 9.41,
5005
+ "learning_rate": 8.167613636363637e-05,
5006
+ "loss": 0.5267,
5007
+ "step": 833
5008
+ },
5009
+ {
5010
+ "epoch": 9.42,
5011
+ "learning_rate": 8.15340909090909e-05,
5012
+ "loss": 0.5314,
5013
+ "step": 834
5014
+ },
5015
+ {
5016
+ "epoch": 9.43,
5017
+ "learning_rate": 8.139204545454546e-05,
5018
+ "loss": 0.4952,
5019
+ "step": 835
5020
+ },
5021
+ {
5022
+ "epoch": 9.44,
5023
+ "learning_rate": 8.125000000000001e-05,
5024
+ "loss": 0.5014,
5025
+ "step": 836
5026
+ },
5027
+ {
5028
+ "epoch": 9.46,
5029
+ "learning_rate": 8.110795454545454e-05,
5030
+ "loss": 0.4967,
5031
+ "step": 837
5032
+ },
5033
+ {
5034
+ "epoch": 9.47,
5035
+ "learning_rate": 8.09659090909091e-05,
5036
+ "loss": 0.5116,
5037
+ "step": 838
5038
+ },
5039
+ {
5040
+ "epoch": 9.48,
5041
+ "learning_rate": 8.082386363636365e-05,
5042
+ "loss": 0.5119,
5043
+ "step": 839
5044
+ },
5045
+ {
5046
+ "epoch": 9.49,
5047
+ "learning_rate": 8.068181818181818e-05,
5048
+ "loss": 0.4987,
5049
+ "step": 840
5050
+ },
5051
+ {
5052
+ "epoch": 9.5,
5053
+ "learning_rate": 8.053977272727274e-05,
5054
+ "loss": 0.5063,
5055
+ "step": 841
5056
+ },
5057
+ {
5058
+ "epoch": 9.51,
5059
+ "learning_rate": 8.039772727272728e-05,
5060
+ "loss": 0.5019,
5061
+ "step": 842
5062
+ },
5063
+ {
5064
+ "epoch": 9.52,
5065
+ "learning_rate": 8.025568181818183e-05,
5066
+ "loss": 0.5272,
5067
+ "step": 843
5068
+ },
5069
+ {
5070
+ "epoch": 9.54,
5071
+ "learning_rate": 8.011363636363637e-05,
5072
+ "loss": 0.4969,
5073
+ "step": 844
5074
+ },
5075
+ {
5076
+ "epoch": 9.55,
5077
+ "learning_rate": 7.997159090909092e-05,
5078
+ "loss": 0.5222,
5079
+ "step": 845
5080
+ },
5081
+ {
5082
+ "epoch": 9.56,
5083
+ "learning_rate": 7.982954545454546e-05,
5084
+ "loss": 0.4729,
5085
+ "step": 846
5086
+ },
5087
+ {
5088
+ "epoch": 9.57,
5089
+ "learning_rate": 7.96875e-05,
5090
+ "loss": 0.4976,
5091
+ "step": 847
5092
+ },
5093
+ {
5094
+ "epoch": 9.58,
5095
+ "learning_rate": 7.954545454545455e-05,
5096
+ "loss": 0.4974,
5097
+ "step": 848
5098
+ },
5099
+ {
5100
+ "epoch": 9.59,
5101
+ "learning_rate": 7.94034090909091e-05,
5102
+ "loss": 0.4849,
5103
+ "step": 849
5104
+ },
5105
+ {
5106
+ "epoch": 9.6,
5107
+ "learning_rate": 7.926136363636364e-05,
5108
+ "loss": 0.4897,
5109
+ "step": 850
5110
+ },
5111
+ {
5112
+ "epoch": 9.61,
5113
+ "learning_rate": 7.911931818181819e-05,
5114
+ "loss": 0.4962,
5115
+ "step": 851
5116
+ },
5117
+ {
5118
+ "epoch": 9.63,
5119
+ "learning_rate": 7.897727272727273e-05,
5120
+ "loss": 0.4877,
5121
+ "step": 852
5122
+ },
5123
+ {
5124
+ "epoch": 9.64,
5125
+ "learning_rate": 7.883522727272728e-05,
5126
+ "loss": 0.4921,
5127
+ "step": 853
5128
+ },
5129
+ {
5130
+ "epoch": 9.65,
5131
+ "learning_rate": 7.869318181818182e-05,
5132
+ "loss": 0.4969,
5133
+ "step": 854
5134
+ },
5135
+ {
5136
+ "epoch": 9.66,
5137
+ "learning_rate": 7.855113636363637e-05,
5138
+ "loss": 0.5045,
5139
+ "step": 855
5140
+ },
5141
+ {
5142
+ "epoch": 9.67,
5143
+ "learning_rate": 7.840909090909091e-05,
5144
+ "loss": 0.5207,
5145
+ "step": 856
5146
+ },
5147
+ {
5148
+ "epoch": 9.68,
5149
+ "learning_rate": 7.826704545454546e-05,
5150
+ "loss": 0.5098,
5151
+ "step": 857
5152
+ },
5153
+ {
5154
+ "epoch": 9.69,
5155
+ "learning_rate": 7.8125e-05,
5156
+ "loss": 0.5005,
5157
+ "step": 858
5158
+ },
5159
+ {
5160
+ "epoch": 9.7,
5161
+ "learning_rate": 7.798295454545455e-05,
5162
+ "loss": 0.5028,
5163
+ "step": 859
5164
+ },
5165
+ {
5166
+ "epoch": 9.72,
5167
+ "learning_rate": 7.784090909090909e-05,
5168
+ "loss": 0.5067,
5169
+ "step": 860
5170
+ },
5171
+ {
5172
+ "epoch": 9.73,
5173
+ "learning_rate": 7.769886363636364e-05,
5174
+ "loss": 0.484,
5175
+ "step": 861
5176
+ },
5177
+ {
5178
+ "epoch": 9.74,
5179
+ "learning_rate": 7.755681818181818e-05,
5180
+ "loss": 0.5029,
5181
+ "step": 862
5182
+ },
5183
+ {
5184
+ "epoch": 9.75,
5185
+ "learning_rate": 7.741477272727273e-05,
5186
+ "loss": 0.5077,
5187
+ "step": 863
5188
+ },
5189
+ {
5190
+ "epoch": 9.76,
5191
+ "learning_rate": 7.727272727272727e-05,
5192
+ "loss": 0.5091,
5193
+ "step": 864
5194
+ },
5195
+ {
5196
+ "epoch": 9.77,
5197
+ "learning_rate": 7.713068181818183e-05,
5198
+ "loss": 0.4781,
5199
+ "step": 865
5200
+ },
5201
+ {
5202
+ "epoch": 9.78,
5203
+ "learning_rate": 7.698863636363636e-05,
5204
+ "loss": 0.5124,
5205
+ "step": 866
5206
+ },
5207
+ {
5208
+ "epoch": 9.79,
5209
+ "learning_rate": 7.684659090909091e-05,
5210
+ "loss": 0.4859,
5211
+ "step": 867
5212
+ },
5213
+ {
5214
+ "epoch": 9.81,
5215
+ "learning_rate": 7.670454545454547e-05,
5216
+ "loss": 0.4872,
5217
+ "step": 868
5218
+ },
5219
+ {
5220
+ "epoch": 9.82,
5221
+ "learning_rate": 7.65625e-05,
5222
+ "loss": 0.4675,
5223
+ "step": 869
5224
+ },
5225
+ {
5226
+ "epoch": 9.83,
5227
+ "learning_rate": 7.642045454545454e-05,
5228
+ "loss": 0.5056,
5229
+ "step": 870
5230
+ },
5231
+ {
5232
+ "epoch": 9.84,
5233
+ "learning_rate": 7.62784090909091e-05,
5234
+ "loss": 0.4868,
5235
+ "step": 871
5236
+ },
5237
+ {
5238
+ "epoch": 9.85,
5239
+ "learning_rate": 7.613636363636363e-05,
5240
+ "loss": 0.4907,
5241
+ "step": 872
5242
+ },
5243
+ {
5244
+ "epoch": 9.86,
5245
+ "learning_rate": 7.599431818181818e-05,
5246
+ "loss": 0.474,
5247
+ "step": 873
5248
+ },
5249
+ {
5250
+ "epoch": 9.87,
5251
+ "learning_rate": 7.585227272727274e-05,
5252
+ "loss": 0.4813,
5253
+ "step": 874
5254
+ },
5255
+ {
5256
+ "epoch": 9.89,
5257
+ "learning_rate": 7.571022727272727e-05,
5258
+ "loss": 0.4838,
5259
+ "step": 875
5260
+ },
5261
+ {
5262
+ "epoch": 9.9,
5263
+ "learning_rate": 7.556818181818183e-05,
5264
+ "loss": 0.4935,
5265
+ "step": 876
5266
+ },
5267
+ {
5268
+ "epoch": 9.91,
5269
+ "learning_rate": 7.542613636363637e-05,
5270
+ "loss": 0.4884,
5271
+ "step": 877
5272
+ },
5273
+ {
5274
+ "epoch": 9.92,
5275
+ "learning_rate": 7.52840909090909e-05,
5276
+ "loss": 0.4797,
5277
+ "step": 878
5278
+ },
5279
+ {
5280
+ "epoch": 9.93,
5281
+ "learning_rate": 7.514204545454546e-05,
5282
+ "loss": 0.479,
5283
+ "step": 879
5284
+ },
5285
+ {
5286
+ "epoch": 9.94,
5287
+ "learning_rate": 7.500000000000001e-05,
5288
+ "loss": 0.4727,
5289
+ "step": 880
5290
+ },
5291
+ {
5292
+ "epoch": 9.95,
5293
+ "learning_rate": 7.485795454545454e-05,
5294
+ "loss": 0.4758,
5295
+ "step": 881
5296
+ },
5297
+ {
5298
+ "epoch": 9.96,
5299
+ "learning_rate": 7.47159090909091e-05,
5300
+ "loss": 0.482,
5301
+ "step": 882
5302
+ },
5303
+ {
5304
+ "epoch": 9.98,
5305
+ "learning_rate": 7.457386363636364e-05,
5306
+ "loss": 0.4951,
5307
+ "step": 883
5308
+ },
5309
+ {
5310
+ "epoch": 9.99,
5311
+ "learning_rate": 7.443181818181817e-05,
5312
+ "loss": 0.4823,
5313
+ "step": 884
5314
+ },
5315
+ {
5316
+ "epoch": 10.0,
5317
+ "learning_rate": 7.428977272727273e-05,
5318
+ "loss": 0.4638,
5319
+ "step": 885
5320
+ },
5321
+ {
5322
+ "epoch": 10.01,
5323
+ "learning_rate": 7.414772727272728e-05,
5324
+ "loss": 0.4715,
5325
+ "step": 886
5326
+ },
5327
+ {
5328
+ "epoch": 10.02,
5329
+ "learning_rate": 7.400568181818182e-05,
5330
+ "loss": 0.461,
5331
+ "step": 887
5332
+ },
5333
+ {
5334
+ "epoch": 10.03,
5335
+ "learning_rate": 7.386363636363637e-05,
5336
+ "loss": 0.4429,
5337
+ "step": 888
5338
+ },
5339
+ {
5340
+ "epoch": 10.04,
5341
+ "learning_rate": 7.372159090909091e-05,
5342
+ "loss": 0.4403,
5343
+ "step": 889
5344
+ },
5345
+ {
5346
+ "epoch": 10.05,
5347
+ "learning_rate": 7.357954545454546e-05,
5348
+ "loss": 0.4519,
5349
+ "step": 890
5350
+ },
5351
+ {
5352
+ "epoch": 10.07,
5353
+ "learning_rate": 7.34375e-05,
5354
+ "loss": 0.4611,
5355
+ "step": 891
5356
+ },
5357
+ {
5358
+ "epoch": 10.08,
5359
+ "learning_rate": 7.329545454545455e-05,
5360
+ "loss": 0.4543,
5361
+ "step": 892
5362
+ },
5363
+ {
5364
+ "epoch": 10.09,
5365
+ "learning_rate": 7.315340909090909e-05,
5366
+ "loss": 0.4528,
5367
+ "step": 893
5368
+ },
5369
+ {
5370
+ "epoch": 10.1,
5371
+ "learning_rate": 7.301136363636364e-05,
5372
+ "loss": 0.4586,
5373
+ "step": 894
5374
+ },
5375
+ {
5376
+ "epoch": 10.11,
5377
+ "learning_rate": 7.286931818181818e-05,
5378
+ "loss": 0.4418,
5379
+ "step": 895
5380
+ },
5381
+ {
5382
+ "epoch": 10.12,
5383
+ "learning_rate": 7.272727272727273e-05,
5384
+ "loss": 0.4435,
5385
+ "step": 896
5386
+ },
5387
+ {
5388
+ "epoch": 10.13,
5389
+ "learning_rate": 7.258522727272727e-05,
5390
+ "loss": 0.44,
5391
+ "step": 897
5392
+ },
5393
+ {
5394
+ "epoch": 10.15,
5395
+ "learning_rate": 7.244318181818183e-05,
5396
+ "loss": 0.4589,
5397
+ "step": 898
5398
+ },
5399
+ {
5400
+ "epoch": 10.16,
5401
+ "learning_rate": 7.230113636363636e-05,
5402
+ "loss": 0.4597,
5403
+ "step": 899
5404
+ },
5405
+ {
5406
+ "epoch": 10.17,
5407
+ "learning_rate": 7.215909090909091e-05,
5408
+ "loss": 0.4479,
5409
+ "step": 900
5410
  }
5411
  ],
5412
  "logging_steps": 1,
5413
  "max_steps": 1408,
5414
  "num_train_epochs": 16,
5415
  "save_steps": 100,
5416
+ "total_flos": 1.227986631604562e+18,
5417
  "trial_name": null,
5418
  "trial_params": null
5419
  }