AlekseyKorshuk commited on
Commit
9205123
1 Parent(s): 5b72dfd
Files changed (6) hide show
  1. config.json +1 -1
  2. optimizer.pt +1 -1
  3. pytorch_model.bin +1 -1
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +205 -3
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "headie-one",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
1
  {
2
+ "_name_or_path": "huggingartists/headie-one",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf0113bc79048fa4f8214311ab56f9b4f73fd88207147dd999b498f940e9b2c
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7491b6f16bd0a1d8e4cb329eaef6bf7bd58975ad38795c84abed5455f08eca81
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c2e46911e2b964ef6bfca87fc55d66441ba1b1e08e3ed511a06da4af21f2ff2
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97acf1b0eb486b438bdd175b53cfcfd5b7761ae16a12d32f98ac946a816b0cbe
3
  size 510396521
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1e26cfdb1fd28906d45a5f87757e190292be8bc12770ec4ca25406b63980b90
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8420f7ec058563a1409e89652133384b907f8881b615732be2215b79b14c7ca6
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0accbb6af875cc1fdea78a273b74af30a4276611bc4487469c70b401bdb5d69c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4d0b85035a9999c17bb635a766784cff44d93cc2adedcb471b74f0a33c5e80a
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 5.471560001373291,
3
  "best_model_checkpoint": "output/headie-one/checkpoint-76",
4
- "epoch": 48.0,
5
- "global_step": 3648,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4764,11 +4764,213 @@
4764
  "eval_samples_per_second": 42.777,
4765
  "eval_steps_per_second": 5.452,
4766
  "step": 3648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4767
  }
4768
  ],
4769
  "max_steps": 3800,
4770
  "num_train_epochs": 50,
4771
- "total_flos": 3768876269568000.0,
4772
  "trial_name": null,
4773
  "trial_params": null
4774
  }
 
1
  {
2
  "best_metric": 5.471560001373291,
3
  "best_model_checkpoint": "output/headie-one/checkpoint-76",
4
+ "epoch": 50.0,
5
+ "global_step": 3800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4764
  "eval_samples_per_second": 42.777,
4765
  "eval_steps_per_second": 5.452,
4766
  "step": 3648
4767
+ },
4768
+ {
4769
+ "epoch": 48.03,
4770
+ "learning_rate": 0.00013696569622025762,
4771
+ "loss": 0.043,
4772
+ "step": 3650
4773
+ },
4774
+ {
4775
+ "epoch": 48.09,
4776
+ "learning_rate": 0.00013434812529663595,
4777
+ "loss": 0.0509,
4778
+ "step": 3655
4779
+ },
4780
+ {
4781
+ "epoch": 48.16,
4782
+ "learning_rate": 0.00012893189933276593,
4783
+ "loss": 0.0521,
4784
+ "step": 3660
4785
+ },
4786
+ {
4787
+ "epoch": 48.22,
4788
+ "learning_rate": 0.00012094756707850676,
4789
+ "loss": 0.051,
4790
+ "step": 3665
4791
+ },
4792
+ {
4793
+ "epoch": 48.29,
4794
+ "learning_rate": 0.00011073499209051154,
4795
+ "loss": 0.0583,
4796
+ "step": 3670
4797
+ },
4798
+ {
4799
+ "epoch": 48.36,
4800
+ "learning_rate": 9.872888599492854e-05,
4801
+ "loss": 0.0547,
4802
+ "step": 3675
4803
+ },
4804
+ {
4805
+ "epoch": 48.42,
4806
+ "learning_rate": 8.54403044178599e-05,
4807
+ "loss": 0.0645,
4808
+ "step": 3680
4809
+ },
4810
+ {
4811
+ "epoch": 48.49,
4812
+ "learning_rate": 7.143489323346813e-05,
4813
+ "loss": 0.0598,
4814
+ "step": 3685
4815
+ },
4816
+ {
4817
+ "epoch": 48.55,
4818
+ "learning_rate": 5.7308811106741546e-05,
4819
+ "loss": 0.0511,
4820
+ "step": 3690
4821
+ },
4822
+ {
4823
+ "epoch": 48.62,
4824
+ "learning_rate": 4.366335321932151e-05,
4825
+ "loss": 0.051,
4826
+ "step": 3695
4827
+ },
4828
+ {
4829
+ "epoch": 48.68,
4830
+ "learning_rate": 3.107935635280202e-05,
4831
+ "loss": 0.0494,
4832
+ "step": 3700
4833
+ },
4834
+ {
4835
+ "epoch": 48.75,
4836
+ "learning_rate": 2.0092474810603514e-05,
4837
+ "loss": 0.047,
4838
+ "step": 3705
4839
+ },
4840
+ {
4841
+ "epoch": 48.82,
4842
+ "learning_rate": 1.1170379591190191e-05,
4843
+ "loss": 0.0484,
4844
+ "step": 3710
4845
+ },
4846
+ {
4847
+ "epoch": 48.88,
4848
+ "learning_rate": 4.69285135968287e-06,
4849
+ "loss": 0.0499,
4850
+ "step": 3715
4851
+ },
4852
+ {
4853
+ "epoch": 48.95,
4854
+ "learning_rate": 9.356145865732563e-07,
4855
+ "loss": 0.0526,
4856
+ "step": 3720
4857
+ },
4858
+ {
4859
+ "epoch": 49.0,
4860
+ "eval_loss": 6.790609836578369,
4861
+ "eval_runtime": 2.386,
4862
+ "eval_samples_per_second": 42.749,
4863
+ "eval_steps_per_second": 5.448,
4864
+ "step": 3724
4865
+ },
4866
+ {
4867
+ "epoch": 49.01,
4868
+ "learning_rate": 5.8600974631134825e-08,
4869
+ "loss": 0.0424,
4870
+ "step": 3725
4871
+ },
4872
+ {
4873
+ "epoch": 49.08,
4874
+ "learning_rate": 2.0991417565617385e-06,
4875
+ "loss": 0.0426,
4876
+ "step": 3730
4877
+ },
4878
+ {
4879
+ "epoch": 49.14,
4880
+ "learning_rate": 6.970378642209748e-06,
4881
+ "loss": 0.0428,
4882
+ "step": 3735
4883
+ },
4884
+ {
4885
+ "epoch": 49.21,
4886
+ "learning_rate": 1.446496105540648e-05,
4887
+ "loss": 0.0409,
4888
+ "step": 3740
4889
+ },
4890
+ {
4891
+ "epoch": 49.28,
4892
+ "learning_rate": 2.4263872282739523e-05,
4893
+ "loss": 0.0452,
4894
+ "step": 3745
4895
+ },
4896
+ {
4897
+ "epoch": 49.34,
4898
+ "learning_rate": 3.595000883765652e-05,
4899
+ "loss": 0.0406,
4900
+ "step": 3750
4901
+ },
4902
+ {
4903
+ "epoch": 49.41,
4904
+ "learning_rate": 4.902593501651078e-05,
4905
+ "loss": 0.0424,
4906
+ "step": 3755
4907
+ },
4908
+ {
4909
+ "epoch": 49.47,
4910
+ "learning_rate": 6.293505690059702e-05,
4911
+ "loss": 0.0368,
4912
+ "step": 3760
4913
+ },
4914
+ {
4915
+ "epoch": 49.54,
4916
+ "learning_rate": 7.708531450507785e-05,
4917
+ "loss": 0.0432,
4918
+ "step": 3765
4919
+ },
4920
+ {
4921
+ "epoch": 49.61,
4922
+ "learning_rate": 9.087438358744153e-05,
4923
+ "loss": 0.0429,
4924
+ "step": 3770
4925
+ },
4926
+ {
4927
+ "epoch": 49.67,
4928
+ "learning_rate": 0.00010371531436754655,
4929
+ "loss": 0.0429,
4930
+ "step": 3775
4931
+ },
4932
+ {
4933
+ "epoch": 49.74,
4934
+ "learning_rate": 0.00011506151581352551,
4935
+ "loss": 0.0473,
4936
+ "step": 3780
4937
+ },
4938
+ {
4939
+ "epoch": 49.8,
4940
+ "learning_rate": 0.0001244300220047349,
4941
+ "loss": 0.041,
4942
+ "step": 3785
4943
+ },
4944
+ {
4945
+ "epoch": 49.87,
4946
+ "learning_rate": 0.00013142205020853727,
4947
+ "loss": 0.051,
4948
+ "step": 3790
4949
+ },
4950
+ {
4951
+ "epoch": 49.93,
4952
+ "learning_rate": 0.0001357399755894562,
4953
+ "loss": 0.0513,
4954
+ "step": 3795
4955
+ },
4956
+ {
4957
+ "epoch": 50.0,
4958
+ "learning_rate": 0.0001372,
4959
+ "loss": 0.053,
4960
+ "step": 3800
4961
+ },
4962
+ {
4963
+ "epoch": 50.0,
4964
+ "eval_loss": 6.893409252166748,
4965
+ "eval_runtime": 2.3737,
4966
+ "eval_samples_per_second": 42.971,
4967
+ "eval_steps_per_second": 5.477,
4968
+ "step": 3800
4969
  }
4970
  ],
4971
  "max_steps": 3800,
4972
  "num_train_epochs": 50,
4973
+ "total_flos": 3925912780800000.0,
4974
  "trial_name": null,
4975
  "trial_params": null
4976
  }