AlekseyKorshuk commited on
Commit
0a935f8
1 Parent(s): ce06b5e

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/bob-dylan")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/31a7e0lm/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Bob Dylan's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1h7wqver) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1h7wqver/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/bob-dylan")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3mj0lvel/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Bob Dylan's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2rt8ywgd) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2rt8ywgd/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -36,7 +36,7 @@
36
  }
37
  },
38
  "torch_dtype": "float32",
39
- "transformers_version": "4.19.2",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
36
  }
37
  },
38
  "torch_dtype": "float32",
39
+ "transformers_version": "4.21.0",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.1156859397888184, "eval_runtime": 5.2897, "eval_samples_per_second": 82.046, "eval_steps_per_second": 10.398, "epoch": 11.0}
1
+ {"eval_loss": 1.0779144763946533, "eval_runtime": 9.1991, "eval_samples_per_second": 44.787, "eval_steps_per_second": 5.653, "epoch": 12.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52eb735612f0abe86a36c99bbc88e4b736d213924b487ddc439a7fda4f3738ba
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4421ceea92622de8494b95903bff7cda6d82883839516b7c317c2f2311032257
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20df6e4328ecf349bf08cc74a4faa3ceabf0373ff1ce5c11ee4657c56c5ebe05
3
  size 995604017
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c3bd6228ea2eec8b01df04b245f6638ed0f75a4497fd243362287540e38e841
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cbda632eba71ca8a5a6d2b2a32f60a0e0d89f0b2b5f27757234f2f9dea5b2bc
3
  size 510396521
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc253aac8cd1a12067cfc9355ddf901c50181d59d9e017d1e6d7648a5fcb6d39
3
  size 510396521
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7446d5500cdd6761e0d9b127f879a785bc53369d1cd3923b64bfed4fdcf6b5a3
3
  size 14567
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6314f0cb6701a11f2f7f767a9faf928846c32fdfc712debd57e46930cf18eb3c
3
  size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07ca2fdd8c3e336181f82585738bd2cd39530e31bea6189b6d35d926f6c48442
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa5febbb8fd869636181bd682c3f67a65a34b578ea9188e4f8a7d80e9273a343
3
  size 623
special_tokens_map.json CHANGED
@@ -1 +1,5 @@
1
- {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer_config.json CHANGED
@@ -1 +1,10 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/bob-dylan", "tokenizer_class": "GPT2Tokenizer"}
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1024,
6
+ "name_or_path": "huggingartists/bob-dylan",
7
+ "special_tokens_map_file": null,
8
+ "tokenizer_class": "GPT2Tokenizer",
9
+ "unk_token": "<|endoftext|>"
10
+ }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.1156859397888184,
3
- "best_model_checkpoint": "output/bob-dylan/checkpoint-3520",
4
  "epoch": 11.0,
5
- "global_step": 3520,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4326,11 +4326,43 @@
4326
  "eval_samples_per_second": 82.33,
4327
  "eval_steps_per_second": 10.433,
4328
  "step": 3520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4329
  }
4330
  ],
4331
- "max_steps": 3520,
4332
- "num_train_epochs": 11,
4333
- "total_flos": 3668148191232000.0,
4334
  "trial_name": null,
4335
  "trial_params": null
4336
  }
1
  {
2
+ "best_metric": 1.0779144763946533,
3
+ "best_model_checkpoint": "output/bob-dylan/checkpoint-3542",
4
  "epoch": 11.0,
5
+ "global_step": 3542,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
4326
  "eval_samples_per_second": 82.33,
4327
  "eval_steps_per_second": 10.433,
4328
  "step": 3520
4329
+ },
4330
+ {
4331
+ "epoch": 10.95,
4332
+ "learning_rate": 9.414215321223168e-07,
4333
+ "loss": 1.3031,
4334
+ "step": 3525
4335
+ },
4336
+ {
4337
+ "epoch": 10.96,
4338
+ "learning_rate": 4.696220449804098e-07,
4339
+ "loss": 1.0973,
4340
+ "step": 3530
4341
+ },
4342
+ {
4343
+ "epoch": 10.98,
4344
+ "learning_rate": 1.5992243352901425e-07,
4345
+ "loss": 1.1205,
4346
+ "step": 3535
4347
+ },
4348
+ {
4349
+ "epoch": 10.99,
4350
+ "learning_rate": 1.3059553632214649e-08,
4351
+ "loss": 1.5828,
4352
+ "step": 3540
4353
+ },
4354
+ {
4355
+ "epoch": 11.0,
4356
+ "eval_loss": 1.0779144763946533,
4357
+ "eval_runtime": 8.8469,
4358
+ "eval_samples_per_second": 46.57,
4359
+ "eval_steps_per_second": 5.878,
4360
+ "step": 3542
4361
  }
4362
  ],
4363
+ "max_steps": 3864,
4364
+ "num_train_epochs": 12,
4365
+ "total_flos": 3691011244032000.0,
4366
  "trial_name": null,
4367
  "trial_params": null
4368
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8998c8154106cd43a7d424edf953518beb4d146ebea8364f94c30b8bca6902f7
3
- size 3247
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:647f2b42971df5194cbc30178f0c27f133e73cc31198b4a11422e02c744f8538
3
+ size 3375