AlekseyKorshuk commited on
Commit
bfe45a9
1 Parent(s): bf33705

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/death-grips")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1z4yxb78/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Death Grips's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/18v4x337) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/18v4x337/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/death-grips")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2hmeenl7/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Death Grips's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/226ak5bw) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/226ak5bw/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "huggingartists/death-grips",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -17,7 +17,9 @@
17
  "n_inner": null,
18
  "n_layer": 12,
19
  "n_positions": 1024,
 
20
  "resid_pdrop": 0.1,
 
21
  "scale_attn_weights": true,
22
  "summary_activation": null,
23
  "summary_first_dropout": 0.1,
@@ -34,7 +36,7 @@
34
  }
35
  },
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.11.3",
38
  "use_cache": true,
39
  "vocab_size": 50257
40
  }
1
  {
2
+ "_name_or_path": "death-grips",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
17
  "n_inner": null,
18
  "n_layer": 12,
19
  "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
  "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
  "scale_attn_weights": true,
24
  "summary_activation": null,
25
  "summary_first_dropout": 0.1,
36
  }
37
  },
38
  "torch_dtype": "float32",
39
+ "transformers_version": "4.16.2",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 3.1992340087890625, "eval_runtime": 0.5011, "eval_samples_per_second": 75.835, "eval_steps_per_second": 9.978, "epoch": 61.0}
1
+ {"eval_loss": 3.3310177326202393, "eval_runtime": 0.4983, "eval_samples_per_second": 74.249, "eval_steps_per_second": 10.034, "epoch": 5.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93ea9acbaf03e5cd973f67cc943630d2e10e1ec2156b1a368c852eef0b80c447
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ad0b6c7d0a03b637b1ac65ce7a101961bf799cbb7df8c58e8abe5b67edfe54
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91885fb2d0a23add8ab65706c01695bf0d25c6bdb6fa85982f61c7a4403d7562
3
- size 995603825
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a89cc3dc4bc5bf5749225580f6d1a68f68cb3efcf28ac360ebc78691d00b0d4
3
+ size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45a4d5fb1bb4bc74f1d211806c05b5291969db23c26535e118f8d45dd0493b88
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:769a68d1b17288adfefbbdfc899d1c4145d9b39899d6716aa50dc0f324d71194
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80bc2e05cd33aba954c4fa5280f46e5f77924d62e7982e05ecf36aff4194107e
3
- size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bc5aa03ef94446dcbc452e8552895048e148b2563e9a24308c35cffc6028de6
3
+ size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2222362d80f6c4a5547283104536d5c4564191fb641a9b7f7b9e346e9a58c2ee
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aadcbf640a1905dd2dacb92f6567732433456c8a12337128489f033e5db42b7
3
  size 623
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
trainer_state.json CHANGED
@@ -1,118 +1,254 @@
1
  {
2
- "best_metric": 3.1992340087890625,
3
- "best_model_checkpoint": "output/death-grips/checkpoint-66",
4
- "epoch": 2.0,
5
- "global_step": 66,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.16,
12
- "learning_rate": 0.00012909979873429716,
13
- "loss": 3.5364,
14
  "step": 5
15
  },
16
  {
17
- "epoch": 0.31,
18
- "learning_rate": 0.00010671211798514472,
19
- "loss": 3.4591,
20
  "step": 10
21
  },
22
  {
23
- "epoch": 0.47,
24
- "learning_rate": 7.532397582660788e-05,
25
- "loss": 3.5388,
26
  "step": 15
27
  },
28
  {
29
- "epoch": 0.62,
30
- "learning_rate": 4.2347916539754844e-05,
31
- "loss": 3.3558,
32
  "step": 20
33
  },
34
  {
35
- "epoch": 0.78,
36
- "learning_rate": 1.557148289931624e-05,
37
- "loss": 3.3211,
38
  "step": 25
39
  },
40
  {
41
- "epoch": 0.94,
42
- "learning_rate": 1.3181297643383925e-06,
43
- "loss": 3.2263,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 1.0,
48
- "eval_loss": 3.4941582679748535,
49
- "eval_runtime": 0.5841,
50
- "eval_samples_per_second": 75.33,
51
- "eval_steps_per_second": 10.272,
52
- "step": 32
53
- },
54
- {
55
- "epoch": 1.0,
56
- "eval_loss": 3.25174617767334,
57
- "eval_runtime": 0.4993,
58
- "eval_samples_per_second": 76.107,
59
- "eval_steps_per_second": 10.014,
60
  "step": 33
61
  },
62
  {
63
  "epoch": 1.06,
64
- "learning_rate": 1.2396913677783124e-06,
65
- "loss": 3.1933,
66
  "step": 35
67
  },
68
  {
69
  "epoch": 1.21,
70
- "learning_rate": 1.4676757700644785e-05,
71
- "loss": 3.1172,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 1.36,
76
- "learning_rate": 4.0102530108070535e-05,
77
- "loss": 3.4529,
78
  "step": 45
79
  },
80
  {
81
  "epoch": 1.52,
82
- "learning_rate": 7.186411942550872e-05,
83
- "loss": 3.2121,
84
  "step": 50
85
  },
86
  {
87
  "epoch": 1.67,
88
- "learning_rate": 0.00010290000000000001,
89
- "loss": 3.1379,
90
  "step": 55
91
  },
92
  {
93
  "epoch": 1.82,
94
- "learning_rate": 0.000126309992352219,
95
- "loss": 3.1008,
96
  "step": 60
97
  },
98
  {
99
  "epoch": 1.97,
100
- "learning_rate": 0.0001368893738885136,
101
- "loss": 3.1177,
102
  "step": 65
103
  },
104
  {
105
  "epoch": 2.0,
106
- "eval_loss": 3.1992340087890625,
107
- "eval_runtime": 0.4988,
108
- "eval_samples_per_second": 76.185,
109
- "eval_steps_per_second": 10.024,
110
  "step": 66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  }
112
  ],
113
- "max_steps": 2013,
114
- "num_train_epochs": 61,
115
- "total_flos": 66890760192000.0,
116
  "trial_name": null,
117
  "trial_params": null
118
  }
1
  {
2
+ "best_metric": 3.3310177326202393,
3
+ "best_model_checkpoint": "output/death-grips/checkpoint-165",
4
+ "epoch": 5.0,
5
+ "global_step": 165,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.15,
12
+ "learning_rate": 7.625888222272212e-06,
13
+ "loss": 2.8634,
14
  "step": 5
15
  },
16
  {
17
+ "epoch": 0.3,
18
+ "learning_rate": 2.8808096003415784e-05,
19
+ "loss": 2.5791,
20
  "step": 10
21
  },
22
  {
23
+ "epoch": 0.45,
24
+ "learning_rate": 5.8837202094452676e-05,
25
+ "loss": 2.7462,
26
  "step": 15
27
  },
28
  {
29
+ "epoch": 0.61,
30
+ "learning_rate": 9.103686228357511e-05,
31
+ "loss": 2.8049,
32
  "step": 20
33
  },
34
  {
35
+ "epoch": 0.76,
36
+ "learning_rate": 0.00011824815501400784,
37
+ "loss": 2.9692,
38
  "step": 25
39
  },
40
  {
41
+ "epoch": 0.91,
42
+ "learning_rate": 0.0001344212179899545,
43
+ "loss": 2.6618,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 1.0,
48
+ "eval_loss": 3.3408315181732178,
49
+ "eval_runtime": 0.4984,
50
+ "eval_samples_per_second": 74.235,
51
+ "eval_steps_per_second": 10.032,
 
 
 
 
 
 
 
 
52
  "step": 33
53
  },
54
  {
55
  "epoch": 1.06,
56
+ "learning_rate": 0.00013596030863222171,
57
+ "loss": 2.7889,
58
  "step": 35
59
  },
60
  {
61
  "epoch": 1.21,
62
+ "learning_rate": 0.00012252324229935523,
63
+ "loss": 2.8071,
64
  "step": 40
65
  },
66
  {
67
  "epoch": 1.36,
68
+ "learning_rate": 9.709746989192948e-05,
69
+ "loss": 2.7665,
70
  "step": 45
71
  },
72
  {
73
  "epoch": 1.52,
74
+ "learning_rate": 6.533588057449117e-05,
75
+ "loss": 2.4805,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 1.67,
80
+ "learning_rate": 3.429999999999996e-05,
81
+ "loss": 2.509,
82
  "step": 55
83
  },
84
  {
85
  "epoch": 1.82,
86
+ "learning_rate": 1.0890007647780984e-05,
87
+ "loss": 2.5541,
88
  "step": 60
89
  },
90
  {
91
  "epoch": 1.97,
92
+ "learning_rate": 3.106261114864048e-07,
93
+ "loss": 2.4372,
94
  "step": 65
95
  },
96
  {
97
  "epoch": 2.0,
98
+ "eval_loss": 3.340346097946167,
99
+ "eval_runtime": 0.4997,
100
+ "eval_samples_per_second": 74.05,
101
+ "eval_steps_per_second": 10.007,
102
  "step": 66
103
+ },
104
+ {
105
+ "epoch": 2.12,
106
+ "learning_rate": 4.913959795097369e-06,
107
+ "loss": 2.7443,
108
+ "step": 70
109
+ },
110
+ {
111
+ "epoch": 2.27,
112
+ "learning_rate": 2.3676553651353298e-05,
113
+ "loss": 2.3401,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 2.42,
118
+ "learning_rate": 5.242693702405328e-05,
119
+ "loss": 2.6265,
120
+ "step": 80
121
+ },
122
+ {
123
+ "epoch": 2.58,
124
+ "learning_rate": 8.477306297594662e-05,
125
+ "loss": 2.5077,
126
+ "step": 85
127
+ },
128
+ {
129
+ "epoch": 2.73,
130
+ "learning_rate": 0.00011352344634864663,
131
+ "loss": 2.2708,
132
+ "step": 90
133
+ },
134
+ {
135
+ "epoch": 2.88,
136
+ "learning_rate": 0.0001322860402049026,
137
+ "loss": 2.6378,
138
+ "step": 95
139
+ },
140
+ {
141
+ "epoch": 3.0,
142
+ "eval_loss": 3.3379859924316406,
143
+ "eval_runtime": 0.5006,
144
+ "eval_samples_per_second": 73.905,
145
+ "eval_steps_per_second": 9.987,
146
+ "step": 99
147
+ },
148
+ {
149
+ "epoch": 3.03,
150
+ "learning_rate": 0.0001368893738885136,
151
+ "loss": 2.4859,
152
+ "step": 100
153
+ },
154
+ {
155
+ "epoch": 3.18,
156
+ "learning_rate": 0.00012630999235221907,
157
+ "loss": 2.3826,
158
+ "step": 105
159
+ },
160
+ {
161
+ "epoch": 3.33,
162
+ "learning_rate": 0.00010290000000000012,
163
+ "loss": 2.3622,
164
+ "step": 110
165
+ },
166
+ {
167
+ "epoch": 3.48,
168
+ "learning_rate": 7.186411942550891e-05,
169
+ "loss": 2.3022,
170
+ "step": 115
171
+ },
172
+ {
173
+ "epoch": 3.64,
174
+ "learning_rate": 4.0102530108070596e-05,
175
+ "loss": 2.3944,
176
+ "step": 120
177
+ },
178
+ {
179
+ "epoch": 3.79,
180
+ "learning_rate": 1.4676757700644823e-05,
181
+ "loss": 2.3517,
182
+ "step": 125
183
+ },
184
+ {
185
+ "epoch": 3.94,
186
+ "learning_rate": 1.2396913677783427e-06,
187
+ "loss": 2.2771,
188
+ "step": 130
189
+ },
190
+ {
191
+ "epoch": 4.0,
192
+ "eval_loss": 3.3588767051696777,
193
+ "eval_runtime": 0.497,
194
+ "eval_samples_per_second": 74.445,
195
+ "eval_steps_per_second": 10.06,
196
+ "step": 132
197
+ },
198
+ {
199
+ "epoch": 4.09,
200
+ "learning_rate": 2.7787820100454274e-06,
201
+ "loss": 2.2086,
202
+ "step": 135
203
+ },
204
+ {
205
+ "epoch": 4.24,
206
+ "learning_rate": 1.8951844985992186e-05,
207
+ "loss": 2.3738,
208
+ "step": 140
209
+ },
210
+ {
211
+ "epoch": 4.39,
212
+ "learning_rate": 4.616313771642481e-05,
213
+ "loss": 2.0153,
214
+ "step": 145
215
+ },
216
+ {
217
+ "epoch": 4.55,
218
+ "learning_rate": 7.836279790554748e-05,
219
+ "loss": 2.2206,
220
+ "step": 150
221
+ },
222
+ {
223
+ "epoch": 4.7,
224
+ "learning_rate": 0.00010839190399658423,
225
+ "loss": 2.1647,
226
+ "step": 155
227
+ },
228
+ {
229
+ "epoch": 4.85,
230
+ "learning_rate": 0.00012957411177772773,
231
+ "loss": 2.1136,
232
+ "step": 160
233
+ },
234
+ {
235
+ "epoch": 5.0,
236
+ "learning_rate": 0.0001372,
237
+ "loss": 2.4947,
238
+ "step": 165
239
+ },
240
+ {
241
+ "epoch": 5.0,
242
+ "eval_loss": 3.3310177326202393,
243
+ "eval_runtime": 0.4984,
244
+ "eval_samples_per_second": 74.234,
245
+ "eval_steps_per_second": 10.032,
246
+ "step": 165
247
  }
248
  ],
249
+ "max_steps": 165,
250
+ "num_train_epochs": 5,
251
+ "total_flos": 169186590720000.0,
252
  "trial_name": null,
253
  "trial_params": null
254
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:820c88075277a4ce2d8bfc6da26d7fd133ba368b384db802a6a4afafc3e34877
3
- size 2863
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb348a771651919e39ddf44cbd2248983e52d34f3c89662cce1a833faf738e5d
3
+ size 3055