AlekseyKorshuk commited on
Commit
6403403
1 Parent(s): 5461006

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/lil-nas-x")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/i3cxi0c3/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Lil Nas X's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1qrt007n) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1qrt007n/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/lil-nas-x")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/13ndcpsd/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Lil Nas X's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3p5781qk) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3p5781qk/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
1
  {
2
+ "_name_or_path": "huggingartists/lil-nas-x",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.9424936771392822, "eval_runtime": 1.2434, "eval_samples_per_second": 20.106, "eval_steps_per_second": 3.217, "epoch": 10.0}
1
+ {"eval_loss": 2.6141936779022217, "eval_runtime": 1.3974, "eval_samples_per_second": 20.037, "eval_steps_per_second": 2.862, "epoch": 106.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20f0207e6068480b12b0f0a1e986ad0b838ed6813a6cb3025608abce843f64a6
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e012de5ccfc4b4c18d70aee94f8defbfc1e50f57718ed71910a48c0e2e996780
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cca5db0a9b829f45ea81c1628183f47350c94e9b0ca90c2c7284c95ad41aeee0
3
  size 995603825
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31152ec3e7ee51f760ec20935ab2868246b309095fe64db579f8d857dc60e25f
3
  size 995603825
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97c65e91bdea4a97855ae617ec185512034c6de6b74b727c5d47b4834600048e
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a5d88426cc944b9b9757165686316e8e8367bafb4aa8f561c8b87fddde8c4a
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dff0838f6c4266c52ad3f733c18288237e78c922be8b97cbee6dca8f0e06b0d4
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4752c512c2a88beb880fae152e08c8dd448d4cc3f5a72c4d66926f865266b7a
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:858b0150f63e516bfdd54df0bdcc3e1de87c6a580f8d9790a43f52a335f62cca
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1491e00f77c720090b01b79de27682170c480dbbeee4b54bb59d93e9dda8447
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/lil-nas-x", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.9424936771392822,
3
- "best_model_checkpoint": "output/lil-nas-x/checkpoint-114",
4
- "epoch": 6.0,
5
- "global_step": 114,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -186,11 +186,43 @@
186
  "eval_samples_per_second": 22.081,
187
  "eval_steps_per_second": 3.533,
188
  "step": 114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }
190
  ],
191
- "max_steps": 190,
192
- "num_train_epochs": 10,
193
- "total_flos": 116013662208000.0,
194
  "trial_name": null,
195
  "trial_params": null
196
  }
1
  {
2
+ "best_metric": 2.6141936779022217,
3
+ "best_model_checkpoint": "output/lil-nas-x/checkpoint-133",
4
+ "epoch": 7.0,
5
+ "global_step": 133,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
186
  "eval_samples_per_second": 22.081,
187
  "eval_steps_per_second": 3.533,
188
  "step": 114
189
+ },
190
+ {
191
+ "epoch": 6.05,
192
+ "learning_rate": 0.00013626438541342677,
193
+ "loss": 1.9341,
194
+ "step": 115
195
+ },
196
+ {
197
+ "epoch": 6.32,
198
+ "learning_rate": 0.00010612064364719856,
199
+ "loss": 2.1993,
200
+ "step": 120
201
+ },
202
+ {
203
+ "epoch": 6.58,
204
+ "learning_rate": 5.1759695582141247e-05,
205
+ "loss": 1.9984,
206
+ "step": 125
207
+ },
208
+ {
209
+ "epoch": 6.84,
210
+ "learning_rate": 8.268100667234864e-06,
211
+ "loss": 2.0998,
212
+ "step": 130
213
+ },
214
+ {
215
+ "epoch": 7.0,
216
+ "eval_loss": 2.6141936779022217,
217
+ "eval_runtime": 1.2675,
218
+ "eval_samples_per_second": 22.091,
219
+ "eval_steps_per_second": 3.156,
220
+ "step": 133
221
  }
222
  ],
223
+ "max_steps": 2014,
224
+ "num_train_epochs": 106,
225
+ "total_flos": 134957334528000.0,
226
  "trial_name": null,
227
  "trial_params": null
228
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:567145a7fe28355135b0476020151a6d4f254152553e83298e8d9a53a3d93243
3
  size 2671
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe08a4b46827a3aa1d0e4730d532a662db372dd2e4a6888c5273969cfba12e5a
3
  size 2671