AlekseyKorshuk commited on
Commit
dcd907c
1 Parent(s): 48d55f1

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/eminem")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/m1jtunaa/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2xezqr9j) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2xezqr9j/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/eminem")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3ewsu9zf/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/228hhsot) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/228hhsot/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -18,7 +18,9 @@
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
 
21
  "resid_pdrop": 0.1,
 
22
  "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.11.3",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
21
+ "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
  "scale_attn_weights": true,
25
  "summary_activation": null,
26
  "summary_first_dropout": 0.1,
 
37
  }
38
  },
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.12.0",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 0.6717901825904846, "eval_runtime": 32.3581, "eval_samples_per_second": 21.077, "eval_steps_per_second": 2.658, "epoch": 3.0}
 
1
+ {"eval_loss": 0.536793053150177, "eval_runtime": 27.8473, "eval_samples_per_second": 22.444, "eval_steps_per_second": 2.837, "epoch": 4.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e4996cc5b01b2f5df2097ea186a2e86f7ee3597d536c4f655a2fc46fe7ba2cc
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4f87ab1eff90581e85a0e1a8b66c8bbdc05aa0f140d00508d68ab79ec62ebb
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71a25a4b42966c35fad23512f0a00af907488f398f69579bbf6993f208f7875f
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2736ac8126eeb3acab31bdda96c21bb571e1ee17ea73a22dac8507b663c7b97f
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9621b75ec1fc38e7c917e2fcc638921268a3d38f2762a4ff89a10975fc0830c
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77194dc46668d0004843bde209b7980f1e28bafa5d4996a99526b9a6bf1206f3
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81db186b04d47e780c47c135689736ec200f7e269a91591d61577bc87d72dc42
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2403905a22d6fba3820922efb35175b8d0fee26b7667687784f25f62cc49001c
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0d0227464d22267541364a30983be6a0f32a9f218c9367e12431d626c7660eb
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9134467a4fb57c206ad7804754df5fb2c2150ce97742085a7f92d7d6b8fdc6f5
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6717901825904846,
3
- "best_model_checkpoint": "output/eminem/checkpoint-1350",
4
  "epoch": 3.0,
5
- "global_step": 1350,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1650,11 +1650,43 @@
1650
  "eval_samples_per_second": 21.1,
1651
  "eval_steps_per_second": 2.661,
1652
  "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1653
  }
1654
  ],
1655
- "max_steps": 1350,
1656
- "num_train_epochs": 3,
1657
- "total_flos": 1410062450688000.0,
1658
  "trial_name": null,
1659
  "trial_params": null
1660
  }
 
1
  {
2
+ "best_metric": 0.536793053150177,
3
+ "best_model_checkpoint": "output/eminem/checkpoint-1371",
4
  "epoch": 3.0,
5
+ "global_step": 1371,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1650
  "eval_samples_per_second": 21.1,
1651
  "eval_steps_per_second": 2.661,
1652
  "step": 1350
1653
+ },
1654
+ {
1655
+ "epoch": 2.96,
1656
+ "learning_rate": 0.0001141037890981026,
1657
+ "loss": 0.9662,
1658
+ "step": 1355
1659
+ },
1660
+ {
1661
+ "epoch": 2.98,
1662
+ "learning_rate": 0.00011584107440876515,
1663
+ "loss": 0.9781,
1664
+ "step": 1360
1665
+ },
1666
+ {
1667
+ "epoch": 2.99,
1668
+ "learning_rate": 0.00011752255327383569,
1669
+ "loss": 1.0047,
1670
+ "step": 1365
1671
+ },
1672
+ {
1673
+ "epoch": 3.0,
1674
+ "learning_rate": 0.00011914623934224141,
1675
+ "loss": 1.0306,
1676
+ "step": 1370
1677
+ },
1678
+ {
1679
+ "epoch": 3.0,
1680
+ "eval_loss": 0.536793053150177,
1681
+ "eval_runtime": 27.1721,
1682
+ "eval_samples_per_second": 23.002,
1683
+ "eval_steps_per_second": 2.907,
1684
+ "step": 1371
1685
  }
1686
  ],
1687
+ "max_steps": 1828,
1688
+ "num_train_epochs": 4,
1689
+ "total_flos": 1431619043328000.0,
1690
  "trial_name": null,
1691
  "trial_params": null
1692
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad2c17509308df0501daa22f7c38deda6ee50f01e56c9f0747a3c0dbf3f69a2e
3
  size 2863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19bbcaf668a94fe1eb5daa14962dff75a0d4631ce679cba94b96ee9567a54ad0
3
  size 2863