Your name commited on
Commit
ebfc8f8
1 Parent(s): 9177a94

Initial commit

Browse files
Files changed (6) hide show
  1. config.json +1 -1
  2. optimizer.pt +1 -1
  3. pytorch_model.bin +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +603 -3
  6. training_args.bin +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/content/drive/MyDrive/ggpt2/checkpoint-180000",
3
  "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
 
1
  {
2
+ "_name_or_path": "/content/drive/MyDrive/ggpt2/checkpoint-360000",
3
  "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e44596804f6bbe6223dc387b01a078b883d9793f67e4205685fa7e6d1bac155b
3
  size 655348487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25aad45ca03f3ac4902671d5f30fcb071be626a4dddaf248bee4b6f553ec9a29
3
  size 655348487
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8e235124e44f7eac920bcb6bf072b75b2cc2272733920b4b17d82b8d859b967
3
  size 333975623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c4d6d6d56f177aadbd34ba09347ff85284717d7a559aac93a6e51fad2a1d41
3
  size 333975623
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:765811e5c0de8679a19ba5175edb8739e017bd63da1b3261fbc6a9c624ff81ea
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:617b3a41c1dd383619fbebee52cdee21b1f69f1f07f755cce1d5f1686ff9115a
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 98.41443411700382,
5
- "global_step": 360000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4326,11 +4326,611 @@
4326
  "learning_rate": 4.5085440874914565e-05,
4327
  "loss": 2.0832,
4328
  "step": 360000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4329
  }
4330
  ],
4331
  "max_steps": 3658000,
4332
  "num_train_epochs": 1000,
4333
- "total_flos": 724673057990639616,
4334
  "trial_name": null,
4335
  "trial_params": null
4336
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 112.08310552214324,
5
+ "global_step": 410000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4326
  "learning_rate": 4.5085440874914565e-05,
4327
  "loss": 2.0832,
4328
  "step": 360000
4329
+ },
4330
+ {
4331
+ "epoch": 98.55,
4332
+ "learning_rate": 4.50786056049214e-05,
4333
+ "loss": 0.0029,
4334
+ "step": 360500
4335
+ },
4336
+ {
4337
+ "epoch": 98.69,
4338
+ "learning_rate": 4.5071770334928234e-05,
4339
+ "loss": 2.0982,
4340
+ "step": 361000
4341
+ },
4342
+ {
4343
+ "epoch": 98.82,
4344
+ "learning_rate": 4.506493506493506e-05,
4345
+ "loss": 2.0996,
4346
+ "step": 361500
4347
+ },
4348
+ {
4349
+ "epoch": 98.96,
4350
+ "learning_rate": 4.5058099794941904e-05,
4351
+ "loss": 2.1084,
4352
+ "step": 362000
4353
+ },
4354
+ {
4355
+ "epoch": 99.1,
4356
+ "learning_rate": 4.505126452494874e-05,
4357
+ "loss": 2.0753,
4358
+ "step": 362500
4359
+ },
4360
+ {
4361
+ "epoch": 99.23,
4362
+ "learning_rate": 4.504442925495557e-05,
4363
+ "loss": 2.0645,
4364
+ "step": 363000
4365
+ },
4366
+ {
4367
+ "epoch": 99.37,
4368
+ "learning_rate": 4.503759398496241e-05,
4369
+ "loss": 2.0722,
4370
+ "step": 363500
4371
+ },
4372
+ {
4373
+ "epoch": 99.51,
4374
+ "learning_rate": 4.503075871496924e-05,
4375
+ "loss": 2.0849,
4376
+ "step": 364000
4377
+ },
4378
+ {
4379
+ "epoch": 99.64,
4380
+ "learning_rate": 4.502392344497608e-05,
4381
+ "loss": 2.0903,
4382
+ "step": 364500
4383
+ },
4384
+ {
4385
+ "epoch": 99.78,
4386
+ "learning_rate": 4.501708817498291e-05,
4387
+ "loss": 2.0973,
4388
+ "step": 365000
4389
+ },
4390
+ {
4391
+ "epoch": 99.92,
4392
+ "learning_rate": 4.501025290498975e-05,
4393
+ "loss": 2.1043,
4394
+ "step": 365500
4395
+ },
4396
+ {
4397
+ "epoch": 100.05,
4398
+ "learning_rate": 4.500341763499659e-05,
4399
+ "loss": 2.0809,
4400
+ "step": 366000
4401
+ },
4402
+ {
4403
+ "epoch": 100.19,
4404
+ "learning_rate": 4.499658236500342e-05,
4405
+ "loss": 2.0633,
4406
+ "step": 366500
4407
+ },
4408
+ {
4409
+ "epoch": 100.33,
4410
+ "learning_rate": 4.498974709501025e-05,
4411
+ "loss": 2.0694,
4412
+ "step": 367000
4413
+ },
4414
+ {
4415
+ "epoch": 100.46,
4416
+ "learning_rate": 4.4982911825017086e-05,
4417
+ "loss": 2.0763,
4418
+ "step": 367500
4419
+ },
4420
+ {
4421
+ "epoch": 100.6,
4422
+ "learning_rate": 4.497607655502393e-05,
4423
+ "loss": 2.0849,
4424
+ "step": 368000
4425
+ },
4426
+ {
4427
+ "epoch": 100.74,
4428
+ "learning_rate": 4.496924128503076e-05,
4429
+ "loss": 2.0855,
4430
+ "step": 368500
4431
+ },
4432
+ {
4433
+ "epoch": 100.87,
4434
+ "learning_rate": 4.49624060150376e-05,
4435
+ "loss": 2.0935,
4436
+ "step": 369000
4437
+ },
4438
+ {
4439
+ "epoch": 101.01,
4440
+ "learning_rate": 4.495557074504443e-05,
4441
+ "loss": 2.1002,
4442
+ "step": 369500
4443
+ },
4444
+ {
4445
+ "epoch": 101.15,
4446
+ "learning_rate": 4.494873547505127e-05,
4447
+ "loss": 2.0488,
4448
+ "step": 370000
4449
+ },
4450
+ {
4451
+ "epoch": 101.28,
4452
+ "learning_rate": 4.49419002050581e-05,
4453
+ "loss": 2.0613,
4454
+ "step": 370500
4455
+ },
4456
+ {
4457
+ "epoch": 101.42,
4458
+ "learning_rate": 4.493506493506494e-05,
4459
+ "loss": 2.0637,
4460
+ "step": 371000
4461
+ },
4462
+ {
4463
+ "epoch": 101.56,
4464
+ "learning_rate": 4.492822966507177e-05,
4465
+ "loss": 2.0769,
4466
+ "step": 371500
4467
+ },
4468
+ {
4469
+ "epoch": 101.69,
4470
+ "learning_rate": 4.4921394395078606e-05,
4471
+ "loss": 2.086,
4472
+ "step": 372000
4473
+ },
4474
+ {
4475
+ "epoch": 101.83,
4476
+ "learning_rate": 4.491455912508544e-05,
4477
+ "loss": 2.0914,
4478
+ "step": 372500
4479
+ },
4480
+ {
4481
+ "epoch": 101.97,
4482
+ "learning_rate": 4.4907723855092276e-05,
4483
+ "loss": 2.0974,
4484
+ "step": 373000
4485
+ },
4486
+ {
4487
+ "epoch": 102.1,
4488
+ "learning_rate": 4.490088858509912e-05,
4489
+ "loss": 2.0534,
4490
+ "step": 373500
4491
+ },
4492
+ {
4493
+ "epoch": 102.24,
4494
+ "learning_rate": 4.489405331510595e-05,
4495
+ "loss": 2.0527,
4496
+ "step": 374000
4497
+ },
4498
+ {
4499
+ "epoch": 102.38,
4500
+ "learning_rate": 4.488721804511278e-05,
4501
+ "loss": 2.0637,
4502
+ "step": 374500
4503
+ },
4504
+ {
4505
+ "epoch": 102.52,
4506
+ "learning_rate": 4.4880382775119615e-05,
4507
+ "loss": 2.0692,
4508
+ "step": 375000
4509
+ },
4510
+ {
4511
+ "epoch": 102.65,
4512
+ "learning_rate": 4.487354750512646e-05,
4513
+ "loss": 2.078,
4514
+ "step": 375500
4515
+ },
4516
+ {
4517
+ "epoch": 102.79,
4518
+ "learning_rate": 4.486671223513329e-05,
4519
+ "loss": 2.0788,
4520
+ "step": 376000
4521
+ },
4522
+ {
4523
+ "epoch": 102.93,
4524
+ "learning_rate": 4.4859876965140126e-05,
4525
+ "loss": 2.089,
4526
+ "step": 376500
4527
+ },
4528
+ {
4529
+ "epoch": 103.06,
4530
+ "learning_rate": 4.485304169514696e-05,
4531
+ "loss": 2.0704,
4532
+ "step": 377000
4533
+ },
4534
+ {
4535
+ "epoch": 103.2,
4536
+ "learning_rate": 4.4846206425153796e-05,
4537
+ "loss": 2.0524,
4538
+ "step": 377500
4539
+ },
4540
+ {
4541
+ "epoch": 103.34,
4542
+ "learning_rate": 4.483937115516063e-05,
4543
+ "loss": 2.0481,
4544
+ "step": 378000
4545
+ },
4546
+ {
4547
+ "epoch": 103.47,
4548
+ "learning_rate": 4.4832535885167465e-05,
4549
+ "loss": 2.0607,
4550
+ "step": 378500
4551
+ },
4552
+ {
4553
+ "epoch": 103.61,
4554
+ "learning_rate": 4.482570061517431e-05,
4555
+ "loss": 2.0742,
4556
+ "step": 379000
4557
+ },
4558
+ {
4559
+ "epoch": 103.75,
4560
+ "learning_rate": 4.4818865345181135e-05,
4561
+ "loss": 2.0698,
4562
+ "step": 379500
4563
+ },
4564
+ {
4565
+ "epoch": 103.88,
4566
+ "learning_rate": 4.481203007518797e-05,
4567
+ "loss": 2.0826,
4568
+ "step": 380000
4569
+ },
4570
+ {
4571
+ "epoch": 104.02,
4572
+ "learning_rate": 4.4805194805194805e-05,
4573
+ "loss": 2.0789,
4574
+ "step": 380500
4575
+ },
4576
+ {
4577
+ "epoch": 104.16,
4578
+ "learning_rate": 4.4798359535201646e-05,
4579
+ "loss": 2.0342,
4580
+ "step": 381000
4581
+ },
4582
+ {
4583
+ "epoch": 104.29,
4584
+ "learning_rate": 4.479152426520848e-05,
4585
+ "loss": 2.0422,
4586
+ "step": 381500
4587
+ },
4588
+ {
4589
+ "epoch": 104.43,
4590
+ "learning_rate": 4.4784688995215316e-05,
4591
+ "loss": 2.0582,
4592
+ "step": 382000
4593
+ },
4594
+ {
4595
+ "epoch": 104.57,
4596
+ "learning_rate": 4.4777853725222144e-05,
4597
+ "loss": 2.0651,
4598
+ "step": 382500
4599
+ },
4600
+ {
4601
+ "epoch": 104.7,
4602
+ "learning_rate": 4.4771018455228985e-05,
4603
+ "loss": 2.0687,
4604
+ "step": 383000
4605
+ },
4606
+ {
4607
+ "epoch": 104.84,
4608
+ "learning_rate": 4.476418318523582e-05,
4609
+ "loss": 2.0763,
4610
+ "step": 383500
4611
+ },
4612
+ {
4613
+ "epoch": 104.98,
4614
+ "learning_rate": 4.4757347915242655e-05,
4615
+ "loss": 2.0866,
4616
+ "step": 384000
4617
+ },
4618
+ {
4619
+ "epoch": 105.11,
4620
+ "learning_rate": 4.475051264524949e-05,
4621
+ "loss": 2.0387,
4622
+ "step": 384500
4623
+ },
4624
+ {
4625
+ "epoch": 105.25,
4626
+ "learning_rate": 4.4743677375256325e-05,
4627
+ "loss": 2.0398,
4628
+ "step": 385000
4629
+ },
4630
+ {
4631
+ "epoch": 105.39,
4632
+ "learning_rate": 4.473684210526316e-05,
4633
+ "loss": 2.0542,
4634
+ "step": 385500
4635
+ },
4636
+ {
4637
+ "epoch": 105.52,
4638
+ "learning_rate": 4.4730006835269994e-05,
4639
+ "loss": 2.0551,
4640
+ "step": 386000
4641
+ },
4642
+ {
4643
+ "epoch": 105.66,
4644
+ "learning_rate": 4.472317156527683e-05,
4645
+ "loss": 2.0639,
4646
+ "step": 386500
4647
+ },
4648
+ {
4649
+ "epoch": 105.8,
4650
+ "learning_rate": 4.471633629528367e-05,
4651
+ "loss": 2.0706,
4652
+ "step": 387000
4653
+ },
4654
+ {
4655
+ "epoch": 105.93,
4656
+ "learning_rate": 4.47095010252905e-05,
4657
+ "loss": 2.0742,
4658
+ "step": 387500
4659
+ },
4660
+ {
4661
+ "epoch": 106.07,
4662
+ "learning_rate": 4.470266575529733e-05,
4663
+ "loss": 2.0517,
4664
+ "step": 388000
4665
+ },
4666
+ {
4667
+ "epoch": 106.21,
4668
+ "learning_rate": 4.4695830485304175e-05,
4669
+ "loss": 2.0298,
4670
+ "step": 388500
4671
+ },
4672
+ {
4673
+ "epoch": 106.34,
4674
+ "learning_rate": 4.468899521531101e-05,
4675
+ "loss": 2.0385,
4676
+ "step": 389000
4677
+ },
4678
+ {
4679
+ "epoch": 106.48,
4680
+ "learning_rate": 4.4682159945317844e-05,
4681
+ "loss": 2.051,
4682
+ "step": 389500
4683
+ },
4684
+ {
4685
+ "epoch": 106.62,
4686
+ "learning_rate": 4.467532467532467e-05,
4687
+ "loss": 2.0592,
4688
+ "step": 390000
4689
+ },
4690
+ {
4691
+ "epoch": 106.75,
4692
+ "learning_rate": 4.4668489405331514e-05,
4693
+ "loss": 2.0676,
4694
+ "step": 390500
4695
+ },
4696
+ {
4697
+ "epoch": 106.89,
4698
+ "learning_rate": 4.466165413533835e-05,
4699
+ "loss": 2.0695,
4700
+ "step": 391000
4701
+ },
4702
+ {
4703
+ "epoch": 107.03,
4704
+ "learning_rate": 4.4654818865345184e-05,
4705
+ "loss": 2.0598,
4706
+ "step": 391500
4707
+ },
4708
+ {
4709
+ "epoch": 107.16,
4710
+ "learning_rate": 4.464798359535202e-05,
4711
+ "loss": 2.024,
4712
+ "step": 392000
4713
+ },
4714
+ {
4715
+ "epoch": 107.3,
4716
+ "learning_rate": 4.464114832535885e-05,
4717
+ "loss": 2.0372,
4718
+ "step": 392500
4719
+ },
4720
+ {
4721
+ "epoch": 107.44,
4722
+ "learning_rate": 4.463431305536569e-05,
4723
+ "loss": 2.0433,
4724
+ "step": 393000
4725
+ },
4726
+ {
4727
+ "epoch": 107.57,
4728
+ "learning_rate": 4.462747778537252e-05,
4729
+ "loss": 2.0472,
4730
+ "step": 393500
4731
+ },
4732
+ {
4733
+ "epoch": 107.71,
4734
+ "learning_rate": 4.462064251537936e-05,
4735
+ "loss": 2.0579,
4736
+ "step": 394000
4737
+ },
4738
+ {
4739
+ "epoch": 107.85,
4740
+ "learning_rate": 4.46138072453862e-05,
4741
+ "loss": 2.0605,
4742
+ "step": 394500
4743
+ },
4744
+ {
4745
+ "epoch": 107.98,
4746
+ "learning_rate": 4.460697197539303e-05,
4747
+ "loss": 2.0745,
4748
+ "step": 395000
4749
+ },
4750
+ {
4751
+ "epoch": 108.12,
4752
+ "learning_rate": 4.460013670539986e-05,
4753
+ "loss": 2.026,
4754
+ "step": 395500
4755
+ },
4756
+ {
4757
+ "epoch": 108.26,
4758
+ "learning_rate": 4.45933014354067e-05,
4759
+ "loss": 2.0251,
4760
+ "step": 396000
4761
+ },
4762
+ {
4763
+ "epoch": 108.39,
4764
+ "learning_rate": 4.458646616541354e-05,
4765
+ "loss": 2.0438,
4766
+ "step": 396500
4767
+ },
4768
+ {
4769
+ "epoch": 108.53,
4770
+ "learning_rate": 4.457963089542037e-05,
4771
+ "loss": 2.0407,
4772
+ "step": 397000
4773
+ },
4774
+ {
4775
+ "epoch": 108.67,
4776
+ "learning_rate": 4.457279562542721e-05,
4777
+ "loss": 2.0477,
4778
+ "step": 397500
4779
+ },
4780
+ {
4781
+ "epoch": 108.8,
4782
+ "learning_rate": 4.456596035543404e-05,
4783
+ "loss": 2.0536,
4784
+ "step": 398000
4785
+ },
4786
+ {
4787
+ "epoch": 108.94,
4788
+ "learning_rate": 4.455912508544088e-05,
4789
+ "loss": 2.065,
4790
+ "step": 398500
4791
+ },
4792
+ {
4793
+ "epoch": 109.08,
4794
+ "learning_rate": 4.455228981544771e-05,
4795
+ "loss": 2.0385,
4796
+ "step": 399000
4797
+ },
4798
+ {
4799
+ "epoch": 109.21,
4800
+ "learning_rate": 4.454545454545455e-05,
4801
+ "loss": 2.0245,
4802
+ "step": 399500
4803
+ },
4804
+ {
4805
+ "epoch": 109.35,
4806
+ "learning_rate": 4.453861927546138e-05,
4807
+ "loss": 2.0304,
4808
+ "step": 400000
4809
+ },
4810
+ {
4811
+ "epoch": 109.49,
4812
+ "learning_rate": 4.453178400546822e-05,
4813
+ "loss": 2.0395,
4814
+ "step": 400500
4815
+ },
4816
+ {
4817
+ "epoch": 109.62,
4818
+ "learning_rate": 4.452494873547505e-05,
4819
+ "loss": 2.0405,
4820
+ "step": 401000
4821
+ },
4822
+ {
4823
+ "epoch": 109.76,
4824
+ "learning_rate": 4.4518113465481886e-05,
4825
+ "loss": 2.0475,
4826
+ "step": 401500
4827
+ },
4828
+ {
4829
+ "epoch": 109.9,
4830
+ "learning_rate": 4.451127819548873e-05,
4831
+ "loss": 2.0538,
4832
+ "step": 402000
4833
+ },
4834
+ {
4835
+ "epoch": 110.03,
4836
+ "learning_rate": 4.450444292549556e-05,
4837
+ "loss": 2.0518,
4838
+ "step": 402500
4839
+ },
4840
+ {
4841
+ "epoch": 110.17,
4842
+ "learning_rate": 4.449760765550239e-05,
4843
+ "loss": 2.0132,
4844
+ "step": 403000
4845
+ },
4846
+ {
4847
+ "epoch": 110.31,
4848
+ "learning_rate": 4.4490772385509225e-05,
4849
+ "loss": 2.0259,
4850
+ "step": 403500
4851
+ },
4852
+ {
4853
+ "epoch": 110.44,
4854
+ "learning_rate": 4.448393711551607e-05,
4855
+ "loss": 2.0302,
4856
+ "step": 404000
4857
+ },
4858
+ {
4859
+ "epoch": 110.58,
4860
+ "learning_rate": 4.44771018455229e-05,
4861
+ "loss": 2.0392,
4862
+ "step": 404500
4863
+ },
4864
+ {
4865
+ "epoch": 110.72,
4866
+ "learning_rate": 4.4470266575529737e-05,
4867
+ "loss": 2.0429,
4868
+ "step": 405000
4869
+ },
4870
+ {
4871
+ "epoch": 110.85,
4872
+ "learning_rate": 4.446343130553657e-05,
4873
+ "loss": 2.0444,
4874
+ "step": 405500
4875
+ },
4876
+ {
4877
+ "epoch": 110.99,
4878
+ "learning_rate": 4.4456596035543406e-05,
4879
+ "loss": 2.0542,
4880
+ "step": 406000
4881
+ },
4882
+ {
4883
+ "epoch": 111.13,
4884
+ "learning_rate": 4.444976076555024e-05,
4885
+ "loss": 2.0086,
4886
+ "step": 406500
4887
+ },
4888
+ {
4889
+ "epoch": 111.26,
4890
+ "learning_rate": 4.4442925495557076e-05,
4891
+ "loss": 2.0216,
4892
+ "step": 407000
4893
+ },
4894
+ {
4895
+ "epoch": 111.4,
4896
+ "learning_rate": 4.443609022556392e-05,
4897
+ "loss": 2.0197,
4898
+ "step": 407500
4899
+ },
4900
+ {
4901
+ "epoch": 111.54,
4902
+ "learning_rate": 4.4429254955570745e-05,
4903
+ "loss": 2.032,
4904
+ "step": 408000
4905
+ },
4906
+ {
4907
+ "epoch": 111.67,
4908
+ "learning_rate": 4.442241968557758e-05,
4909
+ "loss": 2.0413,
4910
+ "step": 408500
4911
+ },
4912
+ {
4913
+ "epoch": 111.81,
4914
+ "learning_rate": 4.4415584415584415e-05,
4915
+ "loss": 2.0456,
4916
+ "step": 409000
4917
+ },
4918
+ {
4919
+ "epoch": 111.95,
4920
+ "learning_rate": 4.4408749145591257e-05,
4921
+ "loss": 2.047,
4922
+ "step": 409500
4923
+ },
4924
+ {
4925
+ "epoch": 112.08,
4926
+ "learning_rate": 4.440191387559809e-05,
4927
+ "loss": 2.0248,
4928
+ "step": 410000
4929
  }
4930
  ],
4931
  "max_steps": 3658000,
4932
  "num_train_epochs": 1000,
4933
+ "total_flos": 825321947035336704,
4934
  "trial_name": null,
4935
  "trial_params": null
4936
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:122f500370d7d743b16c3069debb5e9c3567f936c76c6d570364e341d541c114
3
  size 2031
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93d2fc2ca4a3f894c9391bd5e40a96e46714817c7477b582da0320beb734993f
3
  size 2031