AlekseyKorshuk commited on
Commit
6a39d0b
1 Parent(s): 57fac9f

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/enigma")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2zrl31ns/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Enigma's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2706pjfi) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2706pjfi/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/enigma")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/8bx90lw6/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Enigma's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1c1t20ji) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1c1t20ji/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
1
  {
2
+ "_name_or_path": "huggingartists/enigma",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 3.9204065799713135, "eval_runtime": 2.4219, "eval_samples_per_second": 19.819, "eval_steps_per_second": 2.477, "epoch": 1.0}
1
+ {"eval_loss": 3.7314870357513428, "eval_runtime": 2.1159, "eval_samples_per_second": 21.267, "eval_steps_per_second": 2.836, "epoch": 2.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27ae1e4faca11fb0373678ac6b341924ba8c2d751bb9995dd28b4f2da30ab69a
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34494afe6195414f25402b1092b21a6c5d3bc09e12b82a51595fc053106f4cbe
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:983bd35bd9d10f8cc6cf739042bc36fd890ca9f5f3d884a1eac45dcdfa28e7f3
3
  size 995603825
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91ffe69c515dd6b1670059d6078d23db802e476e919c1ebe2fa2e1574f8da98a
3
  size 995603825
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b449540dc207e6c17fe2e6f96fe7ca39dd301ba633a439eefffe60d5c2198c8d
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ff70e5e7b675aa5d6178e40f4c978b332c317fa63f44e779e416cdbd5fc2a17
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4865cef34c88ebff2de8694d032cb0c22961cc9866bf669c5b15cf1c9e378740
3
- size 14567
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b94157e2bb9ab763b1509d4627cd0a65ce3c8a5bd3742b19eeec8bba1ef0aef
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8989e74728331765fdc15dc13be950f4c53d6c5551f5ee94e2d269e65894c977
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2222362d80f6c4a5547283104536d5c4564191fb641a9b7f7b9e346e9a58c2ee
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/enigma", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 3.9204065799713135,
3
- "best_model_checkpoint": "output/enigma/checkpoint-33",
4
- "epoch": 1.0,
5
- "global_step": 33,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -50,11 +50,61 @@
50
  "eval_samples_per_second": 20.139,
51
  "eval_steps_per_second": 2.517,
52
  "step": 33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  }
54
  ],
55
- "max_steps": 33,
56
- "num_train_epochs": 1,
57
- "total_flos": 33837318144000.0,
58
  "trial_name": null,
59
  "trial_params": null
60
  }
1
  {
2
+ "best_metric": 3.7314870357513428,
3
+ "best_model_checkpoint": "output/enigma/checkpoint-66",
4
+ "epoch": 2.0,
5
+ "global_step": 66,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
50
  "eval_samples_per_second": 20.139,
51
  "eval_steps_per_second": 2.517,
52
  "step": 33
53
+ },
54
+ {
55
+ "epoch": 1.06,
56
+ "learning_rate": 1.2396913677783124e-06,
57
+ "loss": 3.4099,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 1.21,
62
+ "learning_rate": 1.4676757700644785e-05,
63
+ "loss": 3.9212,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 1.36,
68
+ "learning_rate": 4.0102530108070535e-05,
69
+ "loss": 3.831,
70
+ "step": 45
71
+ },
72
+ {
73
+ "epoch": 1.52,
74
+ "learning_rate": 7.186411942550872e-05,
75
+ "loss": 3.4868,
76
+ "step": 50
77
+ },
78
+ {
79
+ "epoch": 1.67,
80
+ "learning_rate": 0.00010290000000000001,
81
+ "loss": 3.5598,
82
+ "step": 55
83
+ },
84
+ {
85
+ "epoch": 1.82,
86
+ "learning_rate": 0.000126309992352219,
87
+ "loss": 3.6865,
88
+ "step": 60
89
+ },
90
+ {
91
+ "epoch": 1.97,
92
+ "learning_rate": 0.0001368893738885136,
93
+ "loss": 3.4504,
94
+ "step": 65
95
+ },
96
+ {
97
+ "epoch": 2.0,
98
+ "eval_loss": 3.7314870357513428,
99
+ "eval_runtime": 1.9498,
100
+ "eval_samples_per_second": 23.079,
101
+ "eval_steps_per_second": 3.077,
102
+ "step": 66
103
  }
104
  ],
105
+ "max_steps": 66,
106
+ "num_train_epochs": 2,
107
+ "total_flos": 68066574336000.0,
108
  "trial_name": null,
109
  "trial_params": null
110
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5fc12339fb114391064e4dc6241e4a8a870fc0dae8a125fb4c0d8c60fd6d5a1
3
  size 2671
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23a6f855dcc08fc6611cb7b927ae059eb524cc6ac63995b001934043b86f8210
3
  size 2671