AlekseyKorshuk commited on
Commit
adabd3c
1 Parent(s): 191a8f4

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/linkin-park")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2b15vg01/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Linkin Park's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/fto51dp4) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/fto51dp4/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/linkin-park")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3mtr0u4z/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Linkin Park's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/fxn4brd6) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/fxn4brd6/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -18,7 +18,9 @@
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
 
21
  "resid_pdrop": 0.1,
 
22
  "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
21
+ "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
  "scale_attn_weights": true,
25
  "summary_activation": null,
26
  "summary_first_dropout": 0.1,
37
  }
38
  },
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.12.2",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.0318267345428467, "eval_runtime": 4.5697, "eval_samples_per_second": 21.008, "eval_steps_per_second": 2.626, "epoch": 3.0}
1
+ {"eval_loss": 1.7180215120315552, "eval_runtime": 4.7844, "eval_samples_per_second": 20.901, "eval_steps_per_second": 2.717, "epoch": 4.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:960e645cfdb5661c8a5575898191f3f17c064dc742a6f5d1c1e623c5a21c709e
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e788d77fe1b0e4b37310b71f47d838058cad60aa5aa7e9a144ead4b8432a2469
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8e18e8cd6f8f5633cbbd1851d80023da278856308536afdaf38cbfb154a8732
3
- size 995603825
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9491f77c6075e808c2ec8cc10e543cb3dec8b4f1e719478904830c0bb572bb3d
3
+ size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a568564c9d7a3f53afa0d2c83e7145788515ff74c6c201a2bf1f4cce94aecb72
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db75cae56bb0856bb04b9fc712dfbdc2552084b04e1bca7596d8840a170bea72
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:623c33d8f94c376806d8d4f4fd33e1db6afbc9689cfdf6d4fc7c47fa87f23bdd
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ddf98a38c2c0ecfdce82ba131154c1ba8b98f75a392abba4114eefe1d934305
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81c6de287d0aeee2c682cd2a01e5e01657f181e757ee594a86aa0945f540d0b1
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007eb1dede0d4998d3ab2e2cd3723c770930c260860d14cb2ace888387dbf23d
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.0318267345428467,
3
- "best_model_checkpoint": "output/linkin-park/checkpoint-210",
4
- "epoch": 3.0,
5
- "global_step": 210,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -290,11 +290,97 @@
290
  "eval_samples_per_second": 21.081,
291
  "eval_steps_per_second": 2.635,
292
  "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  }
294
  ],
295
- "max_steps": 210,
296
- "num_train_epochs": 3,
297
- "total_flos": 217264324608000.0,
298
  "trial_name": null,
299
  "trial_params": null
300
  }
1
  {
2
+ "best_metric": 1.7180215120315552,
3
+ "best_model_checkpoint": "output/linkin-park/checkpoint-276",
4
+ "epoch": 4.0,
5
+ "global_step": 276,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
290
  "eval_samples_per_second": 21.081,
291
  "eval_steps_per_second": 2.635,
292
  "step": 210
293
+ },
294
+ {
295
+ "epoch": 3.12,
296
+ "learning_rate": 4.500582770777967e-06,
297
+ "loss": 1.8704,
298
+ "step": 215
299
+ },
300
+ {
301
+ "epoch": 3.19,
302
+ "learning_rate": 1.166986896886688e-05,
303
+ "loss": 1.6973,
304
+ "step": 220
305
+ },
306
+ {
307
+ "epoch": 3.26,
308
+ "learning_rate": 2.177685437520025e-05,
309
+ "loss": 1.9541,
310
+ "step": 225
311
+ },
312
+ {
313
+ "epoch": 3.33,
314
+ "learning_rate": 3.4300000000000014e-05,
315
+ "loss": 1.7097,
316
+ "step": 230
317
+ },
318
+ {
319
+ "epoch": 3.41,
320
+ "learning_rate": 4.859308855339902e-05,
321
+ "loss": 1.8994,
322
+ "step": 235
323
+ },
324
+ {
325
+ "epoch": 3.48,
326
+ "learning_rate": 6.391857044318346e-05,
327
+ "loss": 1.772,
328
+ "step": 240
329
+ },
330
+ {
331
+ "epoch": 3.55,
332
+ "learning_rate": 7.948562268689874e-05,
333
+ "loss": 1.7301,
334
+ "step": 245
335
+ },
336
+ {
337
+ "epoch": 3.62,
338
+ "learning_rate": 9.449095682862937e-05,
339
+ "loss": 2.1161,
340
+ "step": 250
341
+ },
342
+ {
343
+ "epoch": 3.7,
344
+ "learning_rate": 0.0001081602700970798,
345
+ "loss": 1.876,
346
+ "step": 255
347
+ },
348
+ {
349
+ "epoch": 3.77,
350
+ "learning_rate": 0.00011978820084915117,
351
+ "loss": 1.9729,
352
+ "step": 260
353
+ },
354
+ {
355
+ "epoch": 3.84,
356
+ "learning_rate": 0.00012877472652481797,
357
+ "loss": 1.7309,
358
+ "step": 265
359
+ },
360
+ {
361
+ "epoch": 3.91,
362
+ "learning_rate": 0.00013465612591205902,
363
+ "loss": 2.0053,
364
+ "step": 270
365
+ },
366
+ {
367
+ "epoch": 3.99,
368
+ "learning_rate": 0.00013712890801216552,
369
+ "loss": 1.7362,
370
+ "step": 275
371
+ },
372
+ {
373
+ "epoch": 4.0,
374
+ "eval_loss": 1.7180215120315552,
375
+ "eval_runtime": 4.7996,
376
+ "eval_samples_per_second": 20.835,
377
+ "eval_steps_per_second": 2.709,
378
+ "step": 276
379
  }
380
  ],
381
+ "max_steps": 276,
382
+ "num_train_epochs": 4,
383
+ "total_flos": 286245421056000.0,
384
  "trial_name": null,
385
  "trial_params": null
386
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aada2de1d92f467f8f5669200ff659c45b17df03bf26159792e731f4a3fea9f2
3
- size 2671
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a579222290c0d1711177447b62010d2079a583ef41b959a81fd08b749cf9458e
3
+ size 2863