AlekseyKorshuk commited on
Commit
e8ae835
1 Parent(s): 659144b

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/bring-me-the-horizon")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/10m71qwf/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Bring Me The Horizon's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2h4xcfis) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2h4xcfis/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/bring-me-the-horizon")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1yuqm1pv/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Bring Me The Horizon's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/z98a5j5r) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/z98a5j5r/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
1
  {
2
+ "_name_or_path": "huggingartists/bring-me-the-horizon",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.672280788421631, "eval_runtime": 0.5431, "eval_samples_per_second": 75.493, "eval_steps_per_second": 11.048, "epoch": 50.0}
1
+ {"eval_loss": 1.761746883392334, "eval_runtime": 0.4813, "eval_samples_per_second": 74.8, "eval_steps_per_second": 10.389, "epoch": 52.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db537ffe72a3ff6f07e80705e1c0c610eaa3eab6eba821179ebe2b0a97b96e66
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f60e50a7e388c49df33a2cac5abe57958ed061c6525713fbbaf0e72692b8751
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a195d9dcb9ddd9fe9fc32e35077eae701ed277e42bfa38724413ed25719ce4aa
3
  size 995603825
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aec99ac123784c1df18282b656728c10ab998edaa76607406f49892e8c5d190
3
  size 995603825
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8c7fa95bd475ba5b53141d6b6a3d95ee5f4e1f608d084240003e51a964c7b30
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00a416d9b660bc7b7cd01571f1569a51e5c89dfe326c7ea762cb31673f6bdd8a
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c9bdadfeacd6791e33b8118f0afae9a6b95a94da0f6e2c990e9113276a5c353
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a9eb709ab35df7f70905c14ca74e38db768088a1b8d7f8fd0df5af8ff6128c9
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d34af555ef0ec293744ffd94e8e92af652827e1907b71bf5e93415c53aafec6f
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bbb036f11beb59109336ec9fefde1522b1dc6499163e8831bd87d849eba417d
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/bring-me-the-horizon", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.672280788421631,
3
- "best_model_checkpoint": "output/bring-me-the-horizon/checkpoint-52",
4
- "epoch": 2.0,
5
- "global_step": 52,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -82,11 +82,245 @@
82
  "eval_samples_per_second": 74.866,
83
  "eval_steps_per_second": 10.956,
84
  "step": 52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
- "max_steps": 1300,
88
- "num_train_epochs": 50,
89
- "total_flos": 52780990464000.0,
90
  "trial_name": null,
91
  "trial_params": null
92
  }
1
  {
2
+ "best_metric": 1.761746883392334,
3
+ "best_model_checkpoint": "output/bring-me-the-horizon/checkpoint-208",
4
+ "epoch": 8.0,
5
+ "global_step": 208,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
82
  "eval_samples_per_second": 74.866,
83
  "eval_steps_per_second": 10.956,
84
  "step": 52
85
+ },
86
+ {
87
+ "epoch": 2.12,
88
+ "learning_rate": 0.00013274211424821946,
89
+ "loss": 2.2735,
90
+ "step": 55
91
+ },
92
+ {
93
+ "epoch": 2.31,
94
+ "learning_rate": 0.00010756924162575734,
95
+ "loss": 2.1555,
96
+ "step": 60
97
+ },
98
+ {
99
+ "epoch": 2.5,
100
+ "learning_rate": 6.860000000000001e-05,
101
+ "loss": 2.1802,
102
+ "step": 65
103
+ },
104
+ {
105
+ "epoch": 2.69,
106
+ "learning_rate": 2.9630758374242683e-05,
107
+ "loss": 1.978,
108
+ "step": 70
109
+ },
110
+ {
111
+ "epoch": 2.88,
112
+ "learning_rate": 4.457885751780558e-06,
113
+ "loss": 2.4223,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 3.0,
118
+ "eval_loss": 1.926320195198059,
119
+ "eval_runtime": 0.4807,
120
+ "eval_samples_per_second": 74.883,
121
+ "eval_steps_per_second": 10.4,
122
+ "step": 78
123
+ },
124
+ {
125
+ "epoch": 3.08,
126
+ "learning_rate": 1.9933913245728396e-06,
127
+ "loss": 2.1228,
128
+ "step": 80
129
+ },
130
+ {
131
+ "epoch": 3.27,
132
+ "learning_rate": 2.310978564468141e-05,
133
+ "loss": 1.9814,
134
+ "step": 85
135
+ },
136
+ {
137
+ "epoch": 3.46,
138
+ "learning_rate": 6.033118373448485e-05,
139
+ "loss": 2.1607,
140
+ "step": 90
141
+ },
142
+ {
143
+ "epoch": 3.65,
144
+ "learning_rate": 0.00010048000960220248,
145
+ "loss": 2.1746,
146
+ "step": 95
147
+ },
148
+ {
149
+ "epoch": 3.85,
150
+ "learning_rate": 0.00012934228335981018,
151
+ "loss": 1.8565,
152
+ "step": 100
153
+ },
154
+ {
155
+ "epoch": 4.0,
156
+ "eval_loss": 1.8828926086425781,
157
+ "eval_runtime": 0.4815,
158
+ "eval_samples_per_second": 74.766,
159
+ "eval_steps_per_second": 10.384,
160
+ "step": 104
161
+ },
162
+ {
163
+ "epoch": 4.04,
164
+ "learning_rate": 0.0001366998287631265,
165
+ "loss": 2.1214,
166
+ "step": 105
167
+ },
168
+ {
169
+ "epoch": 4.23,
170
+ "learning_rate": 0.00011994783732453755,
171
+ "loss": 1.898,
172
+ "step": 110
173
+ },
174
+ {
175
+ "epoch": 4.42,
176
+ "learning_rate": 8.501705457012643e-05,
177
+ "loss": 1.8994,
178
+ "step": 115
179
+ },
180
+ {
181
+ "epoch": 4.62,
182
+ "learning_rate": 4.42741047488822e-05,
183
+ "loss": 1.9389,
184
+ "step": 120
185
+ },
186
+ {
187
+ "epoch": 4.81,
188
+ "learning_rate": 1.2143306799695228e-05,
189
+ "loss": 1.7737,
190
+ "step": 125
191
+ },
192
+ {
193
+ "epoch": 5.0,
194
+ "learning_rate": 0.0,
195
+ "loss": 1.7275,
196
+ "step": 130
197
+ },
198
+ {
199
+ "epoch": 5.0,
200
+ "eval_loss": 1.8320238590240479,
201
+ "eval_runtime": 0.4801,
202
+ "eval_samples_per_second": 74.982,
203
+ "eval_steps_per_second": 10.414,
204
+ "step": 130
205
+ },
206
+ {
207
+ "epoch": 5.19,
208
+ "learning_rate": 1.2143306799695106e-05,
209
+ "loss": 1.6759,
210
+ "step": 135
211
+ },
212
+ {
213
+ "epoch": 5.38,
214
+ "learning_rate": 4.4274104748882125e-05,
215
+ "loss": 1.934,
216
+ "step": 140
217
+ },
218
+ {
219
+ "epoch": 5.58,
220
+ "learning_rate": 8.501705457012647e-05,
221
+ "loss": 1.7796,
222
+ "step": 145
223
+ },
224
+ {
225
+ "epoch": 5.77,
226
+ "learning_rate": 0.00011994783732453749,
227
+ "loss": 1.5568,
228
+ "step": 150
229
+ },
230
+ {
231
+ "epoch": 5.96,
232
+ "learning_rate": 0.00013669982876312649,
233
+ "loss": 1.7681,
234
+ "step": 155
235
+ },
236
+ {
237
+ "epoch": 6.0,
238
+ "eval_loss": 1.8055299520492554,
239
+ "eval_runtime": 0.4812,
240
+ "eval_samples_per_second": 74.816,
241
+ "eval_steps_per_second": 10.391,
242
+ "step": 156
243
+ },
244
+ {
245
+ "epoch": 6.15,
246
+ "learning_rate": 0.00012934228335981018,
247
+ "loss": 1.5298,
248
+ "step": 160
249
+ },
250
+ {
251
+ "epoch": 6.35,
252
+ "learning_rate": 0.00010048000960220254,
253
+ "loss": 1.5527,
254
+ "step": 165
255
+ },
256
+ {
257
+ "epoch": 6.54,
258
+ "learning_rate": 6.033118373448493e-05,
259
+ "loss": 1.6001,
260
+ "step": 170
261
+ },
262
+ {
263
+ "epoch": 6.73,
264
+ "learning_rate": 2.3109785644681573e-05,
265
+ "loss": 1.5066,
266
+ "step": 175
267
+ },
268
+ {
269
+ "epoch": 6.92,
270
+ "learning_rate": 1.9933913245728244e-06,
271
+ "loss": 1.5358,
272
+ "step": 180
273
+ },
274
+ {
275
+ "epoch": 7.0,
276
+ "eval_loss": 1.7874510288238525,
277
+ "eval_runtime": 0.4807,
278
+ "eval_samples_per_second": 74.894,
279
+ "eval_steps_per_second": 10.402,
280
+ "step": 182
281
+ },
282
+ {
283
+ "epoch": 7.12,
284
+ "learning_rate": 4.457885751780527e-06,
285
+ "loss": 1.45,
286
+ "step": 185
287
+ },
288
+ {
289
+ "epoch": 7.31,
290
+ "learning_rate": 2.963075837424261e-05,
291
+ "loss": 1.4034,
292
+ "step": 190
293
+ },
294
+ {
295
+ "epoch": 7.5,
296
+ "learning_rate": 6.859999999999982e-05,
297
+ "loss": 1.3896,
298
+ "step": 195
299
+ },
300
+ {
301
+ "epoch": 7.69,
302
+ "learning_rate": 0.00010756924162575728,
303
+ "loss": 1.3993,
304
+ "step": 200
305
+ },
306
+ {
307
+ "epoch": 7.88,
308
+ "learning_rate": 0.00013274211424821943,
309
+ "loss": 1.4986,
310
+ "step": 205
311
+ },
312
+ {
313
+ "epoch": 8.0,
314
+ "eval_loss": 1.761746883392334,
315
+ "eval_runtime": 0.4842,
316
+ "eval_samples_per_second": 74.353,
317
+ "eval_steps_per_second": 10.327,
318
+ "step": 208
319
  }
320
  ],
321
+ "max_steps": 1352,
322
+ "num_train_epochs": 52,
323
+ "total_flos": 215043342336000.0,
324
  "trial_name": null,
325
  "trial_params": null
326
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:113255f50e87af9ff4033495785f9a42c4e9419fab63846e2aa6036bdea1f7dc
3
  size 2863
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a61175f6b4282876536474ec422652e78dcd99a973713a9795eb16ce4347ccb
3
  size 2863