joelniklaus commited on
Commit
0f28af8
1 Parent(s): c2fb397

Training in progress, step 800000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12427d8cb55bc137001ab5c9a8222ed6a6f243dbb40efea27baa36c1640602ec
3
  size 885330713
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd4e9f97205769b9d327217534afeab7d15d8878377c4702248f6d8106176224
3
  size 885330713
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f7d237efb7293fe26e971dbe427368c6f6fbdef8f5d21e25a2265500a4e6fa4
3
  size 442678571
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9308899e7cf9b42f3d67f27af3fc47d5047d1474ee940ca97311078cf54325b8
3
  size 442678571
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc61d70c21a4fc2ed632e1ea3c73eebca5da0e8af02c14be9957f3a9c9d54348
3
  size 17563
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:063012cc8e8259f90140aa24921bb350f0801958a91aea05b73d5ff385433a8e
3
  size 17563
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a4e406e7382112b0689f4693a38031e699bebf288b4ec12177f0b10ca11a3b6
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dfd958abf22782455e0945b592950503a89b2eff4aac0ecd4b072d0e9cd3f74
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba18b7e75c348f39be1644097821a4dfa1ecd7782f3d73a5b96038989ffadeb2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a782142f42cbef3421597d05870d8435f13392f6658de5fc017128ff2f53ff61
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.139467,
5
- "global_step": 750000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4641,11 +4641,320 @@
4641
  "eval_samples_per_second": 455.192,
4642
  "eval_steps_per_second": 0.91,
4643
  "step": 750000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4644
  }
4645
  ],
4646
  "max_steps": 1000000,
4647
  "num_train_epochs": 9223372036854775807,
4648
- "total_flos": 5.053617476072374e+19,
4649
  "trial_name": null,
4650
  "trial_params": null
4651
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.189467,
5
+ "global_step": 800000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4641
  "eval_samples_per_second": 455.192,
4642
  "eval_steps_per_second": 0.91,
4643
  "step": 750000
4644
+ },
4645
+ {
4646
+ "epoch": 1.14,
4647
+ "learning_rate": 1.6050007694457925e-05,
4648
+ "loss": 0.8906,
4649
+ "step": 751000
4650
+ },
4651
+ {
4652
+ "epoch": 1.14,
4653
+ "learning_rate": 1.592892699662655e-05,
4654
+ "loss": 0.7487,
4655
+ "step": 752000
4656
+ },
4657
+ {
4658
+ "epoch": 1.14,
4659
+ "learning_rate": 1.5808097506834524e-05,
4660
+ "loss": 0.7857,
4661
+ "step": 753000
4662
+ },
4663
+ {
4664
+ "epoch": 1.14,
4665
+ "learning_rate": 1.5687641934210118e-05,
4666
+ "loss": 0.7229,
4667
+ "step": 754000
4668
+ },
4669
+ {
4670
+ "epoch": 1.14,
4671
+ "learning_rate": 1.556768148850735e-05,
4672
+ "loss": 0.7239,
4673
+ "step": 755000
4674
+ },
4675
+ {
4676
+ "epoch": 1.15,
4677
+ "learning_rate": 1.544797732076107e-05,
4678
+ "loss": 0.923,
4679
+ "step": 756000
4680
+ },
4681
+ {
4682
+ "epoch": 1.15,
4683
+ "learning_rate": 1.532877014553799e-05,
4684
+ "loss": 0.9589,
4685
+ "step": 757000
4686
+ },
4687
+ {
4688
+ "epoch": 1.15,
4689
+ "learning_rate": 1.5209822613682983e-05,
4690
+ "loss": 0.9368,
4691
+ "step": 758000
4692
+ },
4693
+ {
4694
+ "epoch": 1.15,
4695
+ "learning_rate": 1.5091255541633964e-05,
4696
+ "loss": 0.9942,
4697
+ "step": 759000
4698
+ },
4699
+ {
4700
+ "epoch": 1.15,
4701
+ "learning_rate": 1.4973188220219254e-05,
4702
+ "loss": 0.996,
4703
+ "step": 760000
4704
+ },
4705
+ {
4706
+ "epoch": 1.15,
4707
+ "learning_rate": 1.4855385569805891e-05,
4708
+ "loss": 0.898,
4709
+ "step": 761000
4710
+ },
4711
+ {
4712
+ "epoch": 1.15,
4713
+ "learning_rate": 1.4738084481176312e-05,
4714
+ "loss": 0.8023,
4715
+ "step": 762000
4716
+ },
4717
+ {
4718
+ "epoch": 1.15,
4719
+ "learning_rate": 1.4621051400316382e-05,
4720
+ "loss": 0.7635,
4721
+ "step": 763000
4722
+ },
4723
+ {
4724
+ "epoch": 1.15,
4725
+ "learning_rate": 1.4504405217970129e-05,
4726
+ "loss": 0.7792,
4727
+ "step": 764000
4728
+ },
4729
+ {
4730
+ "epoch": 1.15,
4731
+ "learning_rate": 1.4388263273453235e-05,
4732
+ "loss": 0.8774,
4733
+ "step": 765000
4734
+ },
4735
+ {
4736
+ "epoch": 1.16,
4737
+ "learning_rate": 1.4272509994685329e-05,
4738
+ "loss": 1.0395,
4739
+ "step": 766000
4740
+ },
4741
+ {
4742
+ "epoch": 1.16,
4743
+ "learning_rate": 1.4157031361942913e-05,
4744
+ "loss": 1.0266,
4745
+ "step": 767000
4746
+ },
4747
+ {
4748
+ "epoch": 1.16,
4749
+ "learning_rate": 1.4041944702162985e-05,
4750
+ "loss": 1.0627,
4751
+ "step": 768000
4752
+ },
4753
+ {
4754
+ "epoch": 1.16,
4755
+ "learning_rate": 1.3927251273914792e-05,
4756
+ "loss": 1.083,
4757
+ "step": 769000
4758
+ },
4759
+ {
4760
+ "epoch": 1.16,
4761
+ "learning_rate": 1.3813066432947708e-05,
4762
+ "loss": 1.0811,
4763
+ "step": 770000
4764
+ },
4765
+ {
4766
+ "epoch": 1.16,
4767
+ "learning_rate": 1.3699162829897188e-05,
4768
+ "loss": 0.9505,
4769
+ "step": 771000
4770
+ },
4771
+ {
4772
+ "epoch": 1.16,
4773
+ "learning_rate": 1.358576951490385e-05,
4774
+ "loss": 0.9315,
4775
+ "step": 772000
4776
+ },
4777
+ {
4778
+ "epoch": 1.16,
4779
+ "learning_rate": 1.3472660714582335e-05,
4780
+ "loss": 0.9083,
4781
+ "step": 773000
4782
+ },
4783
+ {
4784
+ "epoch": 1.16,
4785
+ "learning_rate": 1.3360063880794788e-05,
4786
+ "loss": 0.8656,
4787
+ "step": 774000
4788
+ },
4789
+ {
4790
+ "epoch": 1.16,
4791
+ "learning_rate": 1.3247754826001119e-05,
4792
+ "loss": 0.9627,
4793
+ "step": 775000
4794
+ },
4795
+ {
4796
+ "epoch": 1.17,
4797
+ "learning_rate": 1.3135847687872443e-05,
4798
+ "loss": 0.9883,
4799
+ "step": 776000
4800
+ },
4801
+ {
4802
+ "epoch": 1.17,
4803
+ "learning_rate": 1.3024454992430079e-05,
4804
+ "loss": 0.9516,
4805
+ "step": 777000
4806
+ },
4807
+ {
4808
+ "epoch": 1.17,
4809
+ "learning_rate": 1.2913354949650841e-05,
4810
+ "loss": 1.044,
4811
+ "step": 778000
4812
+ },
4813
+ {
4814
+ "epoch": 1.17,
4815
+ "learning_rate": 1.2802881463850613e-05,
4816
+ "loss": 1.0429,
4817
+ "step": 779000
4818
+ },
4819
+ {
4820
+ "epoch": 1.17,
4821
+ "learning_rate": 1.2692592964051836e-05,
4822
+ "loss": 0.9851,
4823
+ "step": 780000
4824
+ },
4825
+ {
4826
+ "epoch": 1.17,
4827
+ "learning_rate": 1.2582712452079226e-05,
4828
+ "loss": 0.8493,
4829
+ "step": 781000
4830
+ },
4831
+ {
4832
+ "epoch": 1.17,
4833
+ "learning_rate": 1.2473241129568458e-05,
4834
+ "loss": 0.7932,
4835
+ "step": 782000
4836
+ },
4837
+ {
4838
+ "epoch": 1.17,
4839
+ "learning_rate": 1.236428904923082e-05,
4840
+ "loss": 0.702,
4841
+ "step": 783000
4842
+ },
4843
+ {
4844
+ "epoch": 1.17,
4845
+ "learning_rate": 1.2255639280464832e-05,
4846
+ "loss": 0.6618,
4847
+ "step": 784000
4848
+ },
4849
+ {
4850
+ "epoch": 1.17,
4851
+ "learning_rate": 1.2147402277980474e-05,
4852
+ "loss": 0.8469,
4853
+ "step": 785000
4854
+ },
4855
+ {
4856
+ "epoch": 1.18,
4857
+ "learning_rate": 1.2039686841331998e-05,
4858
+ "loss": 0.9126,
4859
+ "step": 786000
4860
+ },
4861
+ {
4862
+ "epoch": 1.18,
4863
+ "learning_rate": 1.1932278502155054e-05,
4864
+ "loss": 0.8954,
4865
+ "step": 787000
4866
+ },
4867
+ {
4868
+ "epoch": 1.18,
4869
+ "learning_rate": 1.1825286465481434e-05,
4870
+ "loss": 0.9188,
4871
+ "step": 788000
4872
+ },
4873
+ {
4874
+ "epoch": 1.18,
4875
+ "learning_rate": 1.1718818267007175e-05,
4876
+ "loss": 0.9873,
4877
+ "step": 789000
4878
+ },
4879
+ {
4880
+ "epoch": 1.18,
4881
+ "learning_rate": 1.1612661921699398e-05,
4882
+ "loss": 0.9544,
4883
+ "step": 790000
4884
+ },
4885
+ {
4886
+ "epoch": 1.18,
4887
+ "learning_rate": 1.150703090064395e-05,
4888
+ "loss": 0.801,
4889
+ "step": 791000
4890
+ },
4891
+ {
4892
+ "epoch": 1.18,
4893
+ "learning_rate": 1.1401714885682025e-05,
4894
+ "loss": 0.8218,
4895
+ "step": 792000
4896
+ },
4897
+ {
4898
+ "epoch": 1.18,
4899
+ "learning_rate": 1.1296820975382121e-05,
4900
+ "loss": 0.7743,
4901
+ "step": 793000
4902
+ },
4903
+ {
4904
+ "epoch": 1.18,
4905
+ "learning_rate": 1.1192454575710875e-05,
4906
+ "loss": 0.7675,
4907
+ "step": 794000
4908
+ },
4909
+ {
4910
+ "epoch": 1.18,
4911
+ "learning_rate": 1.1088407886452029e-05,
4912
+ "loss": 0.9008,
4913
+ "step": 795000
4914
+ },
4915
+ {
4916
+ "epoch": 1.19,
4917
+ "learning_rate": 1.0984890136358416e-05,
4918
+ "loss": 0.8879,
4919
+ "step": 796000
4920
+ },
4921
+ {
4922
+ "epoch": 1.19,
4923
+ "learning_rate": 1.0881695214929688e-05,
4924
+ "loss": 0.9673,
4925
+ "step": 797000
4926
+ },
4927
+ {
4928
+ "epoch": 1.19,
4929
+ "learning_rate": 1.0778928085014794e-05,
4930
+ "loss": 1.0564,
4931
+ "step": 798000
4932
+ },
4933
+ {
4934
+ "epoch": 1.19,
4935
+ "learning_rate": 1.0676691994057019e-05,
4936
+ "loss": 1.0726,
4937
+ "step": 799000
4938
+ },
4939
+ {
4940
+ "epoch": 1.19,
4941
+ "learning_rate": 1.0574783383421865e-05,
4942
+ "loss": 0.9626,
4943
+ "step": 800000
4944
+ },
4945
+ {
4946
+ "epoch": 1.19,
4947
+ "eval_accuracy": 0.8352768932764173,
4948
+ "eval_loss": 0.7242327928543091,
4949
+ "eval_runtime": 12.0402,
4950
+ "eval_samples_per_second": 415.274,
4951
+ "eval_steps_per_second": 0.831,
4952
+ "step": 800000
4953
  }
4954
  ],
4955
  "max_steps": 1000000,
4956
  "num_train_epochs": 9223372036854775807,
4957
+ "total_flos": 5.390525448188854e+19,
4958
  "trial_name": null,
4959
  "trial_params": null
4960
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f7d237efb7293fe26e971dbe427368c6f6fbdef8f5d21e25a2265500a4e6fa4
3
  size 442678571
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9308899e7cf9b42f3d67f27af3fc47d5047d1474ee940ca97311078cf54325b8
3
  size 442678571