AlekseyKorshuk commited on
Commit
b2b0ec7
1 Parent(s): 9205123

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/headie-one")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/x7sbsok3/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Headie One's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/23dok566) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/23dok566/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/headie-one")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3fzj7qkl/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Headie One's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1d1n36x9) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1d1n36x9/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "huggingartists/headie-one",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -36,7 +36,7 @@
36
  }
37
  },
38
  "torch_dtype": "float32",
39
- "transformers_version": "4.20.0",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
 
1
  {
2
+ "_name_or_path": "headie-one",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
36
  }
37
  },
38
  "torch_dtype": "float32",
39
+ "transformers_version": "4.20.1",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.5699154138565063, "eval_runtime": 1.6396, "eval_samples_per_second": 40.864, "eval_steps_per_second": 5.489, "epoch": 19.0}
 
1
+ {"eval_loss": 0.8823016285896301, "eval_runtime": 2.4753, "eval_samples_per_second": 42.419, "eval_steps_per_second": 5.656, "epoch": 70.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f961cbbd78988f8623923dfc3e5aedc36596ae0f89789aa50a153214ef47c9ba
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e97c2b39904c2125990372bcd2c0af3ae009d0f37cd2567d4ed60579e6209d63
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7491b6f16bd0a1d8e4cb329eaef6bf7bd58975ad38795c84abed5455f08eca81
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f62d82420e69209e064b44c6ac85956e0739f6dc942fcf17f0200b7f3f0b24c
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97acf1b0eb486b438bdd175b53cfcfd5b7761ae16a12d32f98ac946a816b0cbe
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1313c47fb885aa521ff58af331fb5d870ec7e29197ced87841b3265ac00a9ea
3
  size 510396521
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8420f7ec058563a1409e89652133384b907f8881b615732be2215b79b14c7ca6
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edd9978b73bc8d959cf1091d4e60a8b7eea3426d8eb1a224bb815d849bd77207
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4d0b85035a9999c17bb635a766784cff44d93cc2adedcb471b74f0a33c5e80a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b1b7cf6b6c584836674e51b2bffb4f225e602280b8da1987fbd27d41d1f5b41
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 5.471560001373291,
3
- "best_model_checkpoint": "output/headie-one/checkpoint-76",
4
- "epoch": 50.0,
5
- "global_step": 3800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4966,11 +4966,147 @@
4966
  "eval_samples_per_second": 42.971,
4967
  "eval_steps_per_second": 5.477,
4968
  "step": 3800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4969
  }
4970
  ],
4971
- "max_steps": 3800,
4972
- "num_train_epochs": 50,
4973
- "total_flos": 3925912780800000.0,
4974
  "trial_name": null,
4975
  "trial_params": null
4976
  }
 
1
  {
2
+ "best_metric": 0.8823016285896301,
3
+ "best_model_checkpoint": "output/headie-one/checkpoint-3900",
4
+ "epoch": 52.0,
5
+ "global_step": 3900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4966
  "eval_samples_per_second": 42.971,
4967
  "eval_steps_per_second": 5.477,
4968
  "step": 3800
4969
+ },
4970
+ {
4971
+ "epoch": 50.73,
4972
+ "learning_rate": 2.2697640403783063e-05,
4973
+ "loss": 0.9066,
4974
+ "step": 3805
4975
+ },
4976
+ {
4977
+ "epoch": 50.8,
4978
+ "learning_rate": 1.3101434185879145e-05,
4979
+ "loss": 0.6969,
4980
+ "step": 3810
4981
+ },
4982
+ {
4983
+ "epoch": 50.87,
4984
+ "learning_rate": 5.930781605717916e-06,
4985
+ "loss": 1.0504,
4986
+ "step": 3815
4987
+ },
4988
+ {
4989
+ "epoch": 50.93,
4990
+ "learning_rate": 1.4990745896610897e-06,
4991
+ "loss": 1.4716,
4992
+ "step": 3820
4993
+ },
4994
+ {
4995
+ "epoch": 51.0,
4996
+ "learning_rate": 0.0,
4997
+ "loss": 1.2765,
4998
+ "step": 3825
4999
+ },
5000
+ {
5001
+ "epoch": 51.0,
5002
+ "eval_loss": 1.2292253971099854,
5003
+ "eval_runtime": 2.6668,
5004
+ "eval_samples_per_second": 39.373,
5005
+ "eval_steps_per_second": 5.25,
5006
+ "step": 3825
5007
+ },
5008
+ {
5009
+ "epoch": 51.07,
5010
+ "learning_rate": 1.499074589660808e-06,
5011
+ "loss": 1.2797,
5012
+ "step": 3830
5013
+ },
5014
+ {
5015
+ "epoch": 51.13,
5016
+ "learning_rate": 5.9307816057173676e-06,
5017
+ "loss": 1.9281,
5018
+ "step": 3835
5019
+ },
5020
+ {
5021
+ "epoch": 51.2,
5022
+ "learning_rate": 1.3101434185878354e-05,
5023
+ "loss": 1.4825,
5024
+ "step": 3840
5025
+ },
5026
+ {
5027
+ "epoch": 51.27,
5028
+ "learning_rate": 2.2697640403782067e-05,
5029
+ "loss": 1.4677,
5030
+ "step": 3845
5031
+ },
5032
+ {
5033
+ "epoch": 51.33,
5034
+ "learning_rate": 3.429999999999976e-05,
5035
+ "loss": 1.0241,
5036
+ "step": 3850
5037
+ },
5038
+ {
5039
+ "epoch": 51.4,
5040
+ "learning_rate": 4.740143418587843e-05,
5041
+ "loss": 1.0786,
5042
+ "step": 3855
5043
+ },
5044
+ {
5045
+ "epoch": 51.47,
5046
+ "learning_rate": 6.142934741983887e-05,
5047
+ "loss": 1.4638,
5048
+ "step": 3860
5049
+ },
5050
+ {
5051
+ "epoch": 51.53,
5052
+ "learning_rate": 7.5770652580161e-05,
5053
+ "loss": 1.0569,
5054
+ "step": 3865
5055
+ },
5056
+ {
5057
+ "epoch": 51.6,
5058
+ "learning_rate": 8.979856581412144e-05,
5059
+ "loss": 1.0274,
5060
+ "step": 3870
5061
+ },
5062
+ {
5063
+ "epoch": 51.67,
5064
+ "learning_rate": 0.00010289999999999844,
5065
+ "loss": 0.9631,
5066
+ "step": 3875
5067
+ },
5068
+ {
5069
+ "epoch": 51.73,
5070
+ "learning_rate": 0.00011450235959621783,
5071
+ "loss": 0.7978,
5072
+ "step": 3880
5073
+ },
5074
+ {
5075
+ "epoch": 51.8,
5076
+ "learning_rate": 0.00012409856581412041,
5077
+ "loss": 0.838,
5078
+ "step": 3885
5079
+ },
5080
+ {
5081
+ "epoch": 51.87,
5082
+ "learning_rate": 0.00013126921839428258,
5083
+ "loss": 0.6216,
5084
+ "step": 3890
5085
+ },
5086
+ {
5087
+ "epoch": 51.93,
5088
+ "learning_rate": 0.00013570092541033876,
5089
+ "loss": 1.2543,
5090
+ "step": 3895
5091
+ },
5092
+ {
5093
+ "epoch": 52.0,
5094
+ "learning_rate": 0.0001372,
5095
+ "loss": 1.0127,
5096
+ "step": 3900
5097
+ },
5098
+ {
5099
+ "epoch": 52.0,
5100
+ "eval_loss": 0.8823016285896301,
5101
+ "eval_runtime": 2.4876,
5102
+ "eval_samples_per_second": 42.209,
5103
+ "eval_steps_per_second": 5.628,
5104
+ "step": 3900
5105
  }
5106
  ],
5107
+ "max_steps": 5250,
5108
+ "num_train_epochs": 70,
5109
+ "total_flos": 4029907009536000.0,
5110
  "trial_name": null,
5111
  "trial_params": null
5112
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ce9d32bd26e298502c691514e6e62cd03430e7295e1fd7ea31138547385731b
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:202761b0e853ed0912633dc56b6e49fb1661b0995bfb2fd6aec7da7426b6f24c
3
  size 3311