AlekseyKorshuk commited on
Commit
204fd78
1 Parent(s): 36c9461

huggingartists

Browse files
README.md CHANGED
@@ -14,7 +14,7 @@ widget:
14
  <div class="inline-flex flex-col" style="line-height: 1.5;">
15
  <div class="flex">
16
  <div
17
- style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/e525200b65911e414a9d38c348eb1c6b.667x667x1.jpg&#39;)">
18
  </div>
19
  </div>
20
  <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/oxxxymiron")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/296e4zy2/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Oxxxymiron's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/lyd324n8) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/lyd324n8/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
14
  <div class="inline-flex flex-col" style="line-height: 1.5;">
15
  <div class="flex">
16
  <div
17
+ style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/57ecbbdaf70c671be2d8b7bd39112db0.1000x1000x1.jpg&#39;)">
18
  </div>
19
  </div>
20
  <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
 
45
  dataset = load_dataset("huggingartists/oxxxymiron")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/35c25tqd/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Oxxxymiron's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1z3u6lod) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1z3u6lod/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "huggingartists/oxxxymiron",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -18,7 +18,9 @@
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
 
21
  "resid_pdrop": 0.1,
 
22
  "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.9.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
1
  {
2
+ "_name_or_path": "oxxxymiron",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
21
+ "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
  "scale_attn_weights": true,
25
  "summary_activation": null,
26
  "summary_first_dropout": 0.1,
 
37
  }
38
  },
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.15.0",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
evaluation.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_loss": 1.3626197576522827, "eval_runtime": 22.1222, "eval_samples_per_second": 20.839, "eval_steps_per_second": 2.622, "epoch": 16.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf9a57b36b9276338603832189c301268f56bae13bf00fb4ac5da15b09879d59
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb82b94c7d34decc95ea0c0ed51c0cccd4dc0cf1f0a8925648cdfafaeb1ad6e6
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fcbd0efc1ba3f1cf85a1ad153a13999c85e76f9f2b7c9440f87ec591788eaa1
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb0cfb7ad5f79a151980f8e8fcb534c9dbe0dc5ca44d80132aa39db60c590550
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa19aa16a103bc8d557dbe011a86e4135bafaa6acf98d21f7c8a1f0ef4362155
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f6e5d2d51fc4962178d6141bcb4e442c8a4b69e91f35e541dc52137ea1701b
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44cd6576738e9780ee196c4c60bbd639b2c9174b22df17c2b94e485513761999
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2072ed4e05f726a0eacce3eb003550040e7f5d45ef05ee6a83533a4c6834a56f
3
  size 14439
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3757bed4046f1195d42cf6b407ad0ea93daf46be4a6bcc490f1daeb7bdf87f8c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b130d72fae4cb24993ff792bae18a70e194b6d74fdcb623b38c9f59180e61d5
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 3885,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4668,11 +4668,325 @@
4668
  "learning_rate": 0.0,
4669
  "loss": 1.42,
4670
  "step": 3885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4671
  }
4672
  ],
4673
- "max_steps": 3885,
4674
- "num_train_epochs": 15,
4675
- "total_flos": 4058518487040000.0,
4676
  "trial_name": null,
4677
  "trial_params": null
4678
  }
 
1
  {
2
+ "best_metric": 1.3626197576522827,
3
+ "best_model_checkpoint": "output/oxxxymiron/checkpoint-4144",
4
+ "epoch": 16.0,
5
+ "global_step": 4144,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4668
  "learning_rate": 0.0,
4669
  "loss": 1.42,
4670
  "step": 3885
4671
+ },
4672
+ {
4673
+ "epoch": 15.02,
4674
+ "learning_rate": 1.261250123775442e-07,
4675
+ "loss": 1.4818,
4676
+ "step": 3890
4677
+ },
4678
+ {
4679
+ "epoch": 15.04,
4680
+ "learning_rate": 5.040362734534007e-07,
4681
+ "loss": 1.4719,
4682
+ "step": 3895
4683
+ },
4684
+ {
4685
+ "epoch": 15.06,
4686
+ "learning_rate": 1.132344160414715e-06,
4687
+ "loss": 1.3957,
4688
+ "step": 3900
4689
+ },
4690
+ {
4691
+ "epoch": 15.08,
4692
+ "learning_rate": 2.008738313494259e-06,
4693
+ "loss": 1.4262,
4694
+ "step": 3905
4695
+ },
4696
+ {
4697
+ "epoch": 15.1,
4698
+ "learning_rate": 3.1299961314263817e-06,
4699
+ "loss": 1.3789,
4700
+ "step": 3910
4701
+ },
4702
+ {
4703
+ "epoch": 15.12,
4704
+ "learning_rate": 4.491994621320232e-06,
4705
+ "loss": 1.4566,
4706
+ "step": 3915
4707
+ },
4708
+ {
4709
+ "epoch": 15.14,
4710
+ "learning_rate": 6.089725559373869e-06,
4711
+ "loss": 1.4144,
4712
+ "step": 3920
4713
+ },
4714
+ {
4715
+ "epoch": 15.15,
4716
+ "learning_rate": 7.91731390668544e-06,
4717
+ "loss": 1.4525,
4718
+ "step": 3925
4719
+ },
4720
+ {
4721
+ "epoch": 15.17,
4722
+ "learning_rate": 9.968039412440788e-06,
4723
+ "loss": 1.4557,
4724
+ "step": 3930
4725
+ },
4726
+ {
4727
+ "epoch": 15.19,
4728
+ "learning_rate": 1.2234361325042642e-05,
4729
+ "loss": 1.4918,
4730
+ "step": 3935
4731
+ },
4732
+ {
4733
+ "epoch": 15.21,
4734
+ "learning_rate": 1.4707946120313543e-05,
4735
+ "loss": 1.5974,
4736
+ "step": 3940
4737
+ },
4738
+ {
4739
+ "epoch": 15.23,
4740
+ "learning_rate": 1.7379698144815265e-05,
4741
+ "loss": 1.3931,
4742
+ "step": 3945
4743
+ },
4744
+ {
4745
+ "epoch": 15.25,
4746
+ "learning_rate": 2.0239793061604638e-05,
4747
+ "loss": 1.3826,
4748
+ "step": 3950
4749
+ },
4750
+ {
4751
+ "epoch": 15.27,
4752
+ "learning_rate": 2.3277713975440236e-05,
4753
+ "loss": 1.4445,
4754
+ "step": 3955
4755
+ },
4756
+ {
4757
+ "epoch": 15.29,
4758
+ "learning_rate": 2.6482290104606033e-05,
4759
+ "loss": 1.4149,
4760
+ "step": 3960
4761
+ },
4762
+ {
4763
+ "epoch": 15.31,
4764
+ "learning_rate": 2.984173785715038e-05,
4765
+ "loss": 1.3804,
4766
+ "step": 3965
4767
+ },
4768
+ {
4769
+ "epoch": 15.33,
4770
+ "learning_rate": 3.334370416049605e-05,
4771
+ "loss": 1.3937,
4772
+ "step": 3970
4773
+ },
4774
+ {
4775
+ "epoch": 15.35,
4776
+ "learning_rate": 3.697531188509998e-05,
4777
+ "loss": 1.4221,
4778
+ "step": 3975
4779
+ },
4780
+ {
4781
+ "epoch": 15.37,
4782
+ "learning_rate": 4.072320719512414e-05,
4783
+ "loss": 1.4485,
4784
+ "step": 3980
4785
+ },
4786
+ {
4787
+ "epoch": 15.39,
4788
+ "learning_rate": 4.4573608652015956e-05,
4789
+ "loss": 1.5171,
4790
+ "step": 3985
4791
+ },
4792
+ {
4793
+ "epoch": 15.41,
4794
+ "learning_rate": 4.851235789042871e-05,
4795
+ "loss": 1.4849,
4796
+ "step": 3990
4797
+ },
4798
+ {
4799
+ "epoch": 15.42,
4800
+ "learning_rate": 5.2524971680144367e-05,
4801
+ "loss": 1.4614,
4802
+ "step": 3995
4803
+ },
4804
+ {
4805
+ "epoch": 15.44,
4806
+ "learning_rate": 5.659669518256589e-05,
4807
+ "loss": 1.413,
4808
+ "step": 4000
4809
+ },
4810
+ {
4811
+ "epoch": 15.46,
4812
+ "learning_rate": 6.071255620594038e-05,
4813
+ "loss": 1.4743,
4814
+ "step": 4005
4815
+ },
4816
+ {
4817
+ "epoch": 15.48,
4818
+ "learning_rate": 6.485742025981448e-05,
4819
+ "loss": 1.4832,
4820
+ "step": 4010
4821
+ },
4822
+ {
4823
+ "epoch": 15.5,
4824
+ "learning_rate": 6.901604620628492e-05,
4825
+ "loss": 1.5146,
4826
+ "step": 4015
4827
+ },
4828
+ {
4829
+ "epoch": 15.52,
4830
+ "learning_rate": 7.317314230339967e-05,
4831
+ "loss": 1.5513,
4832
+ "step": 4020
4833
+ },
4834
+ {
4835
+ "epoch": 15.54,
4836
+ "learning_rate": 7.731342243463577e-05,
4837
+ "loss": 1.5379,
4838
+ "step": 4025
4839
+ },
4840
+ {
4841
+ "epoch": 15.56,
4842
+ "learning_rate": 8.142166231769639e-05,
4843
+ "loss": 1.4753,
4844
+ "step": 4030
4845
+ },
4846
+ {
4847
+ "epoch": 15.58,
4848
+ "learning_rate": 8.548275548593135e-05,
4849
+ "loss": 1.5384,
4850
+ "step": 4035
4851
+ },
4852
+ {
4853
+ "epoch": 15.6,
4854
+ "learning_rate": 8.948176883653908e-05,
4855
+ "loss": 1.5967,
4856
+ "step": 4040
4857
+ },
4858
+ {
4859
+ "epoch": 15.62,
4860
+ "learning_rate": 9.340399754128752e-05,
4861
+ "loss": 1.4906,
4862
+ "step": 4045
4863
+ },
4864
+ {
4865
+ "epoch": 15.64,
4866
+ "learning_rate": 9.723501911784575e-05,
4867
+ "loss": 1.5322,
4868
+ "step": 4050
4869
+ },
4870
+ {
4871
+ "epoch": 15.66,
4872
+ "learning_rate": 0.0001009607464628976,
4873
+ "loss": 1.5223,
4874
+ "step": 4055
4875
+ },
4876
+ {
4877
+ "epoch": 15.68,
4878
+ "learning_rate": 0.00010456747965202585,
4879
+ "loss": 1.4992,
4880
+ "step": 4060
4881
+ },
4882
+ {
4883
+ "epoch": 15.69,
4884
+ "learning_rate": 0.00010804195631589752,
4885
+ "loss": 1.5217,
4886
+ "step": 4065
4887
+ },
4888
+ {
4889
+ "epoch": 15.71,
4890
+ "learning_rate": 0.00011137140040750902,
4891
+ "loss": 1.4526,
4892
+ "step": 4070
4893
+ },
4894
+ {
4895
+ "epoch": 15.73,
4896
+ "learning_rate": 0.00011454356918116707,
4897
+ "loss": 1.5553,
4898
+ "step": 4075
4899
+ },
4900
+ {
4901
+ "epoch": 15.75,
4902
+ "learning_rate": 0.00011754679821046198,
4903
+ "loss": 1.4297,
4904
+ "step": 4080
4905
+ },
4906
+ {
4907
+ "epoch": 15.77,
4908
+ "learning_rate": 0.00012037004427969463,
4909
+ "loss": 1.4843,
4910
+ "step": 4085
4911
+ },
4912
+ {
4913
+ "epoch": 15.79,
4914
+ "learning_rate": 0.00012300292599103915,
4915
+ "loss": 1.4636,
4916
+ "step": 4090
4917
+ },
4918
+ {
4919
+ "epoch": 15.81,
4920
+ "learning_rate": 0.00012543576193812758,
4921
+ "loss": 1.4776,
4922
+ "step": 4095
4923
+ },
4924
+ {
4925
+ "epoch": 15.83,
4926
+ "learning_rate": 0.00012765960630568412,
4927
+ "loss": 1.514,
4928
+ "step": 4100
4929
+ },
4930
+ {
4931
+ "epoch": 15.85,
4932
+ "learning_rate": 0.00012966628176431014,
4933
+ "loss": 1.4759,
4934
+ "step": 4105
4935
+ },
4936
+ {
4937
+ "epoch": 15.87,
4938
+ "learning_rate": 0.00013144840953945605,
4939
+ "loss": 1.396,
4940
+ "step": 4110
4941
+ },
4942
+ {
4943
+ "epoch": 15.89,
4944
+ "learning_rate": 0.00013299943654401656,
4945
+ "loss": 1.5696,
4946
+ "step": 4115
4947
+ },
4948
+ {
4949
+ "epoch": 15.91,
4950
+ "learning_rate": 0.0001343136594747805,
4951
+ "loss": 1.5059,
4952
+ "step": 4120
4953
+ },
4954
+ {
4955
+ "epoch": 15.93,
4956
+ "learning_rate": 0.0001353862457841268,
4957
+ "loss": 1.4919,
4958
+ "step": 4125
4959
+ },
4960
+ {
4961
+ "epoch": 15.95,
4962
+ "learning_rate": 0.00013621325144985277,
4963
+ "loss": 1.515,
4964
+ "step": 4130
4965
+ },
4966
+ {
4967
+ "epoch": 15.97,
4968
+ "learning_rate": 0.00013679163547779453,
4969
+ "loss": 1.4448,
4970
+ "step": 4135
4971
+ },
4972
+ {
4973
+ "epoch": 15.98,
4974
+ "learning_rate": 0.00013711927108390882,
4975
+ "loss": 1.5143,
4976
+ "step": 4140
4977
+ },
4978
+ {
4979
+ "epoch": 16.0,
4980
+ "eval_loss": 1.3626197576522827,
4981
+ "eval_runtime": 22.0604,
4982
+ "eval_samples_per_second": 20.897,
4983
+ "eval_steps_per_second": 2.629,
4984
+ "step": 4144
4985
  }
4986
  ],
4987
+ "max_steps": 4144,
4988
+ "num_train_epochs": 16,
4989
+ "total_flos": 4329217032192000.0,
4990
  "trial_name": null,
4991
  "trial_params": null
4992
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c8781197db3a3403466497c5da7316ba5318202ed62c8d7147bd3a02f7bd353
3
- size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa22e711f18ef9c494e076522c9fc6c23c5e5db9e44a9588da998b4d53d50ce7
3
+ size 2991