AlekseyKorshuk commited on
Commit
86ac4b4
1 Parent(s): beb58cb

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/coldplay")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2dznxm49/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Coldplay's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3ll07k50) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3ll07k50/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/coldplay")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/34iy5awi/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Coldplay's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/p1oxwu42) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/p1oxwu42/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
1
  {
2
+ "_name_or_path": "huggingartists/coldplay",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.11.3",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.627840757369995, "eval_runtime": 3.4065, "eval_samples_per_second": 21.43, "eval_steps_per_second": 2.936, "epoch": 1.0}
 
1
+ {"eval_loss": 2.3194692134857178, "eval_runtime": 1.0619, "eval_samples_per_second": 76.28, "eval_steps_per_second": 10.359, "epoch": 61.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ead9db893f292aadd4d15f62a1b535b49253de67d50eb246ca4ce2263fe819a
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b47c6d38b14cb8948342f6984a6c1085465ea6d66dc0b0e263b9efdcf12dd4d
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acfa87429140421dff57f9d4e3279f875fe9ae59d2be9ca282b5821837308051
3
- size 995603825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da8b48aba8a2ac3780274dff233d2f46b80384823383f633919cb3fff88e3a1a
3
+ size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62bbad0e4434383099ceb0e60e0c83e6f9d86385a92809741b438ac46c9e6dfa
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d268187c04aaef8b37af7894fd8c042ea497315e5c23e7d7d5f5175e3c43fc91
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:206cdb80d3e3bcaa4a4db6bb72279bbb124927d5763dfcc626c79769ad185e35
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8173196c210c0874e6edb77639fe67968b8a0f875123e25b7002bd9aa763efc
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71252f08280d2f37911a6a36ecee4f688037ea330ad139359869696f6b1221ab
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea923f2e0b275d809c555b520c2a11f0ea9d5d9892df660f643bda82951dfca7
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/coldplay", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.627840757369995,
3
- "best_model_checkpoint": "output/coldplay/checkpoint-48",
4
- "epoch": 1.0,
5
- "global_step": 48,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -68,11 +68,463 @@
68
  "eval_samples_per_second": 21.405,
69
  "eval_steps_per_second": 2.932,
70
  "step": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  }
72
  ],
73
- "max_steps": 48,
74
- "num_train_epochs": 1,
75
- "total_flos": 50037424128000.0,
76
  "trial_name": null,
77
  "trial_params": null
78
  }
 
1
  {
2
+ "best_metric": 2.3194692134857178,
3
+ "best_model_checkpoint": "output/coldplay/checkpoint-376",
4
+ "epoch": 8.0,
5
+ "global_step": 376,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
68
  "eval_samples_per_second": 21.405,
69
  "eval_steps_per_second": 2.932,
70
  "step": 48
71
+ },
72
+ {
73
+ "epoch": 1.06,
74
+ "learning_rate": 1.3746270344901413e-06,
75
+ "loss": 2.7251,
76
+ "step": 50
77
+ },
78
+ {
79
+ "epoch": 1.17,
80
+ "learning_rate": 9.576451662754438e-06,
81
+ "loss": 2.4207,
82
+ "step": 55
83
+ },
84
+ {
85
+ "epoch": 1.28,
86
+ "learning_rate": 2.4309929383066146e-05,
87
+ "loss": 2.7058,
88
+ "step": 60
89
+ },
90
+ {
91
+ "epoch": 1.38,
92
+ "learning_rate": 4.3944626783346644e-05,
93
+ "loss": 2.6908,
94
+ "step": 65
95
+ },
96
+ {
97
+ "epoch": 1.49,
98
+ "learning_rate": 6.630773257727353e-05,
99
+ "loss": 2.7345,
100
+ "step": 70
101
+ },
102
+ {
103
+ "epoch": 1.6,
104
+ "learning_rate": 8.892450484875447e-05,
105
+ "loss": 2.4105,
106
+ "step": 75
107
+ },
108
+ {
109
+ "epoch": 1.7,
110
+ "learning_rate": 0.00010929213048843373,
111
+ "loss": 2.5926,
112
+ "step": 80
113
+ },
114
+ {
115
+ "epoch": 1.81,
116
+ "learning_rate": 0.00012515669103944476,
117
+ "loss": 2.4909,
118
+ "step": 85
119
+ },
120
+ {
121
+ "epoch": 1.91,
122
+ "learning_rate": 0.00013476258540873022,
123
+ "loss": 2.5389,
124
+ "step": 90
125
+ },
126
+ {
127
+ "epoch": 2.0,
128
+ "eval_loss": 2.446333646774292,
129
+ "eval_runtime": 1.0636,
130
+ "eval_samples_per_second": 76.156,
131
+ "eval_steps_per_second": 10.342,
132
+ "step": 94
133
+ },
134
+ {
135
+ "epoch": 2.02,
136
+ "learning_rate": 0.00013704680787354832,
137
+ "loss": 2.3598,
138
+ "step": 95
139
+ },
140
+ {
141
+ "epoch": 2.13,
142
+ "learning_rate": 0.00013175658222600302,
143
+ "loss": 2.2124,
144
+ "step": 100
145
+ },
146
+ {
147
+ "epoch": 2.23,
148
+ "learning_rate": 0.00011947733444744994,
149
+ "loss": 2.1658,
150
+ "step": 105
151
+ },
152
+ {
153
+ "epoch": 2.34,
154
+ "learning_rate": 0.0001015679084058065,
155
+ "loss": 2.1519,
156
+ "step": 110
157
+ },
158
+ {
159
+ "epoch": 2.45,
160
+ "learning_rate": 8.001019372440279e-05,
161
+ "loss": 2.243,
162
+ "step": 115
163
+ },
164
+ {
165
+ "epoch": 2.55,
166
+ "learning_rate": 5.718980627559731e-05,
167
+ "loss": 2.2952,
168
+ "step": 120
169
+ },
170
+ {
171
+ "epoch": 2.66,
172
+ "learning_rate": 3.563209159419354e-05,
173
+ "loss": 2.2394,
174
+ "step": 125
175
+ },
176
+ {
177
+ "epoch": 2.77,
178
+ "learning_rate": 1.772266555255008e-05,
179
+ "loss": 2.2106,
180
+ "step": 130
181
+ },
182
+ {
183
+ "epoch": 2.87,
184
+ "learning_rate": 5.443417773996978e-06,
185
+ "loss": 2.2222,
186
+ "step": 135
187
+ },
188
+ {
189
+ "epoch": 2.98,
190
+ "learning_rate": 1.5319212645169297e-07,
191
+ "loss": 2.1897,
192
+ "step": 140
193
+ },
194
+ {
195
+ "epoch": 3.0,
196
+ "eval_loss": 2.3795111179351807,
197
+ "eval_runtime": 1.0634,
198
+ "eval_samples_per_second": 76.169,
199
+ "eval_steps_per_second": 10.344,
200
+ "step": 141
201
+ },
202
+ {
203
+ "epoch": 3.09,
204
+ "learning_rate": 2.4374145912697595e-06,
205
+ "loss": 2.0277,
206
+ "step": 145
207
+ },
208
+ {
209
+ "epoch": 3.19,
210
+ "learning_rate": 1.204330896055522e-05,
211
+ "loss": 2.0337,
212
+ "step": 150
213
+ },
214
+ {
215
+ "epoch": 3.3,
216
+ "learning_rate": 2.790786951156628e-05,
217
+ "loss": 2.0818,
218
+ "step": 155
219
+ },
220
+ {
221
+ "epoch": 3.4,
222
+ "learning_rate": 4.8275495151245426e-05,
223
+ "loss": 2.1041,
224
+ "step": 160
225
+ },
226
+ {
227
+ "epoch": 3.51,
228
+ "learning_rate": 7.089226742272638e-05,
229
+ "loss": 2.1708,
230
+ "step": 165
231
+ },
232
+ {
233
+ "epoch": 3.62,
234
+ "learning_rate": 9.325537321665337e-05,
235
+ "loss": 2.0552,
236
+ "step": 170
237
+ },
238
+ {
239
+ "epoch": 3.72,
240
+ "learning_rate": 0.00011289007061693382,
241
+ "loss": 2.1205,
242
+ "step": 175
243
+ },
244
+ {
245
+ "epoch": 3.83,
246
+ "learning_rate": 0.00012762354833724553,
247
+ "loss": 1.9704,
248
+ "step": 180
249
+ },
250
+ {
251
+ "epoch": 3.94,
252
+ "learning_rate": 0.00013582537296550986,
253
+ "loss": 1.9882,
254
+ "step": 185
255
+ },
256
+ {
257
+ "epoch": 4.0,
258
+ "eval_loss": 2.3495991230010986,
259
+ "eval_runtime": 1.0619,
260
+ "eval_samples_per_second": 76.281,
261
+ "eval_steps_per_second": 10.359,
262
+ "step": 188
263
+ },
264
+ {
265
+ "epoch": 4.04,
266
+ "learning_rate": 0.0001365879156874179,
267
+ "loss": 2.1933,
268
+ "step": 190
269
+ },
270
+ {
271
+ "epoch": 4.15,
272
+ "learning_rate": 0.00012982679213998792,
273
+ "loss": 1.8172,
274
+ "step": 195
275
+ },
276
+ {
277
+ "epoch": 4.26,
278
+ "learning_rate": 0.00011629020053848047,
279
+ "loss": 1.6238,
280
+ "step": 200
281
+ },
282
+ {
283
+ "epoch": 4.36,
284
+ "learning_rate": 9.747612470258382e-05,
285
+ "loss": 1.8408,
286
+ "step": 205
287
+ },
288
+ {
289
+ "epoch": 4.47,
290
+ "learning_rate": 7.546656444541333e-05,
291
+ "loss": 1.8867,
292
+ "step": 210
293
+ },
294
+ {
295
+ "epoch": 4.57,
296
+ "learning_rate": 5.2697137691647635e-05,
297
+ "loss": 2.0636,
298
+ "step": 215
299
+ },
300
+ {
301
+ "epoch": 4.68,
302
+ "learning_rate": 3.1687550572992616e-05,
303
+ "loss": 1.8887,
304
+ "step": 220
305
+ },
306
+ {
307
+ "epoch": 4.79,
308
+ "learning_rate": 1.4762762169883855e-05,
309
+ "loss": 1.9152,
310
+ "step": 225
311
+ },
312
+ {
313
+ "epoch": 4.89,
314
+ "learning_rate": 3.795700315696817e-06,
315
+ "loss": 1.8236,
316
+ "step": 230
317
+ },
318
+ {
319
+ "epoch": 5.0,
320
+ "learning_rate": 0.0,
321
+ "loss": 1.6923,
322
+ "step": 235
323
+ },
324
+ {
325
+ "epoch": 5.0,
326
+ "eval_loss": 2.332808017730713,
327
+ "eval_runtime": 1.0629,
328
+ "eval_samples_per_second": 76.208,
329
+ "eval_steps_per_second": 10.349,
330
+ "step": 235
331
+ },
332
+ {
333
+ "epoch": 5.11,
334
+ "learning_rate": 3.7957003156967485e-06,
335
+ "loss": 1.6147,
336
+ "step": 240
337
+ },
338
+ {
339
+ "epoch": 5.21,
340
+ "learning_rate": 1.4762762169883802e-05,
341
+ "loss": 1.7048,
342
+ "step": 245
343
+ },
344
+ {
345
+ "epoch": 5.32,
346
+ "learning_rate": 3.168755057299255e-05,
347
+ "loss": 1.5544,
348
+ "step": 250
349
+ },
350
+ {
351
+ "epoch": 5.43,
352
+ "learning_rate": 5.269713769164743e-05,
353
+ "loss": 1.5129,
354
+ "step": 255
355
+ },
356
+ {
357
+ "epoch": 5.53,
358
+ "learning_rate": 7.546656444541325e-05,
359
+ "loss": 1.7164,
360
+ "step": 260
361
+ },
362
+ {
363
+ "epoch": 5.64,
364
+ "learning_rate": 9.747612470258363e-05,
365
+ "loss": 1.6792,
366
+ "step": 265
367
+ },
368
+ {
369
+ "epoch": 5.74,
370
+ "learning_rate": 0.0001162902005384805,
371
+ "loss": 1.7173,
372
+ "step": 270
373
+ },
374
+ {
375
+ "epoch": 5.85,
376
+ "learning_rate": 0.00012982679213998787,
377
+ "loss": 1.7795,
378
+ "step": 275
379
+ },
380
+ {
381
+ "epoch": 5.96,
382
+ "learning_rate": 0.00013658791568741792,
383
+ "loss": 1.9717,
384
+ "step": 280
385
+ },
386
+ {
387
+ "epoch": 6.0,
388
+ "eval_loss": 2.336082935333252,
389
+ "eval_runtime": 1.0719,
390
+ "eval_samples_per_second": 75.567,
391
+ "eval_steps_per_second": 10.262,
392
+ "step": 282
393
+ },
394
+ {
395
+ "epoch": 6.06,
396
+ "learning_rate": 0.00013582537296550986,
397
+ "loss": 1.6967,
398
+ "step": 285
399
+ },
400
+ {
401
+ "epoch": 6.17,
402
+ "learning_rate": 0.00012762354833724559,
403
+ "loss": 1.4993,
404
+ "step": 290
405
+ },
406
+ {
407
+ "epoch": 6.28,
408
+ "learning_rate": 0.0001128900706169339,
409
+ "loss": 1.3665,
410
+ "step": 295
411
+ },
412
+ {
413
+ "epoch": 6.38,
414
+ "learning_rate": 9.325537321665346e-05,
415
+ "loss": 1.5098,
416
+ "step": 300
417
+ },
418
+ {
419
+ "epoch": 6.49,
420
+ "learning_rate": 7.089226742272658e-05,
421
+ "loss": 1.6949,
422
+ "step": 305
423
+ },
424
+ {
425
+ "epoch": 6.6,
426
+ "learning_rate": 4.827549515124539e-05,
427
+ "loss": 1.5465,
428
+ "step": 310
429
+ },
430
+ {
431
+ "epoch": 6.7,
432
+ "learning_rate": 2.7907869511566348e-05,
433
+ "loss": 1.506,
434
+ "step": 315
435
+ },
436
+ {
437
+ "epoch": 6.81,
438
+ "learning_rate": 1.2043308960555334e-05,
439
+ "loss": 1.6257,
440
+ "step": 320
441
+ },
442
+ {
443
+ "epoch": 6.91,
444
+ "learning_rate": 2.437414591269752e-06,
445
+ "loss": 1.3859,
446
+ "step": 325
447
+ },
448
+ {
449
+ "epoch": 7.0,
450
+ "eval_loss": 2.334944009780884,
451
+ "eval_runtime": 1.0709,
452
+ "eval_samples_per_second": 75.638,
453
+ "eval_steps_per_second": 10.272,
454
+ "step": 329
455
+ },
456
+ {
457
+ "epoch": 7.02,
458
+ "learning_rate": 1.5319212645167772e-07,
459
+ "loss": 1.5912,
460
+ "step": 330
461
+ },
462
+ {
463
+ "epoch": 7.13,
464
+ "learning_rate": 5.443417773996994e-06,
465
+ "loss": 1.4726,
466
+ "step": 335
467
+ },
468
+ {
469
+ "epoch": 7.23,
470
+ "learning_rate": 1.772266555255011e-05,
471
+ "loss": 1.2431,
472
+ "step": 340
473
+ },
474
+ {
475
+ "epoch": 7.34,
476
+ "learning_rate": 3.563209159419346e-05,
477
+ "loss": 1.3668,
478
+ "step": 345
479
+ },
480
+ {
481
+ "epoch": 7.45,
482
+ "learning_rate": 5.718980627559723e-05,
483
+ "loss": 1.3476,
484
+ "step": 350
485
+ },
486
+ {
487
+ "epoch": 7.55,
488
+ "learning_rate": 8.001019372440265e-05,
489
+ "loss": 1.2561,
490
+ "step": 355
491
+ },
492
+ {
493
+ "epoch": 7.66,
494
+ "learning_rate": 0.00010156790840580641,
495
+ "loss": 1.6616,
496
+ "step": 360
497
+ },
498
+ {
499
+ "epoch": 7.77,
500
+ "learning_rate": 0.0001194773344474498,
501
+ "loss": 1.4607,
502
+ "step": 365
503
+ },
504
+ {
505
+ "epoch": 7.87,
506
+ "learning_rate": 0.00013175658222600294,
507
+ "loss": 1.3461,
508
+ "step": 370
509
+ },
510
+ {
511
+ "epoch": 7.98,
512
+ "learning_rate": 0.00013704680787354832,
513
+ "loss": 1.3842,
514
+ "step": 375
515
+ },
516
+ {
517
+ "epoch": 8.0,
518
+ "eval_loss": 2.3194692134857178,
519
+ "eval_runtime": 1.0661,
520
+ "eval_samples_per_second": 75.98,
521
+ "eval_steps_per_second": 10.318,
522
+ "step": 376
523
  }
524
  ],
525
+ "max_steps": 2867,
526
+ "num_train_epochs": 61,
527
+ "total_flos": 391938048000000.0,
528
  "trial_name": null,
529
  "trial_params": null
530
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:048a39b3592a2f42dac700c0353f225e76c9fb9c51f735a610400f6387986da0
3
- size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7042b00f38d89f1720e38aa30d3c0f04599b070773bf507b3a660f4d7684fea2
3
+ size 2863