AlekseyKorshuk commited on
Commit
59e1032
1 Parent(s): 16d8d65

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2mqz06ef/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1u21f1zw) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1u21f1zw/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/26pogqcg/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2f54nbz5) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2f54nbz5/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.1",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.10.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.029100775718689, "eval_runtime": 7.2168, "eval_samples_per_second": 20.508, "eval_steps_per_second": 2.633, "epoch": 21.0}
 
1
+ {"eval_loss": 1.004758358001709, "eval_runtime": 6.3083, "eval_samples_per_second": 20.766, "eval_steps_per_second": 2.695, "epoch": 14.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22fb23883fc29f2e2299e376a3685d3899aa27180ebd009e6a276dd79c0f549a
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73f4525a915f9e923ec8c1948b7703789a43fc65ae0c0ce7a70b29cc32f99056
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cae6db0edbbd42bbc453b0149987a6ee3a6ba790c60e2925ab382f8c1f160ba
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3189c4875db3d6c2430dd21cc7aaf58494cb3ea16a1644a9eba14559109a180
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0d244ecace2e78de24953f6135d7cc932ff6489b68a4c749677efd661e57029
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77256682836976672d4132cd08e64ed8578aca99003f5b1ad793033c0c217d56
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76d31626c3bac6215c579c04c793834783bc9d247d3009c55526f8f7392b8d8e
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c58bc32277fe141391df912d7cedc705240c20ad9546d00ca7addc7fbb5b47a9
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d04e24471eba096654369d2172acef6ba07c54938134ee47e6aca1b621eca74
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7618b7dada376a378c38d9aa39cd28c5eded00be56339f85d3ea9724b270283c
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.029100775718689,
3
- "best_model_checkpoint": "output/morgenshtern/checkpoint-1274",
4
  "epoch": 13.0,
5
- "global_step": 1274,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1674,11 +1674,55 @@
1674
  "eval_samples_per_second": 21.242,
1675
  "eval_steps_per_second": 2.727,
1676
  "step": 1274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1677
  }
1678
  ],
1679
- "max_steps": 2058,
1680
- "num_train_epochs": 21,
1681
- "total_flos": 1320047345664000.0,
1682
  "trial_name": null,
1683
  "trial_params": null
1684
  }
 
1
  {
2
+ "best_metric": 1.004758358001709,
3
+ "best_model_checkpoint": "output/morgenshtern/checkpoint-1300",
4
  "epoch": 13.0,
5
+ "global_step": 1300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1674
  "eval_samples_per_second": 21.242,
1675
  "eval_steps_per_second": 2.727,
1676
  "step": 1274
1677
+ },
1678
+ {
1679
+ "epoch": 12.75,
1680
+ "learning_rate": 2.0092474810602958e-05,
1681
+ "loss": 1.2194,
1682
+ "step": 1275
1683
+ },
1684
+ {
1685
+ "epoch": 12.8,
1686
+ "learning_rate": 1.3101434185878674e-05,
1687
+ "loss": 1.0397,
1688
+ "step": 1280
1689
+ },
1690
+ {
1691
+ "epoch": 12.85,
1692
+ "learning_rate": 7.476952440677985e-06,
1693
+ "loss": 1.0537,
1694
+ "step": 1285
1695
+ },
1696
+ {
1697
+ "epoch": 12.9,
1698
+ "learning_rate": 3.35752298215246e-06,
1699
+ "loss": 1.2597,
1700
+ "step": 1290
1701
+ },
1702
+ {
1703
+ "epoch": 12.95,
1704
+ "learning_rate": 8.445798351736176e-07,
1705
+ "loss": 1.0949,
1706
+ "step": 1295
1707
+ },
1708
+ {
1709
+ "epoch": 13.0,
1710
+ "learning_rate": 0.0,
1711
+ "loss": 1.1523,
1712
+ "step": 1300
1713
+ },
1714
+ {
1715
+ "epoch": 13.0,
1716
+ "eval_loss": 1.004758358001709,
1717
+ "eval_runtime": 6.2971,
1718
+ "eval_samples_per_second": 20.803,
1719
+ "eval_steps_per_second": 2.7,
1720
+ "step": 1300
1721
  }
1722
  ],
1723
+ "max_steps": 1400,
1724
+ "num_train_epochs": 14,
1725
+ "total_flos": 1346568486912000.0,
1726
  "trial_name": null,
1727
  "trial_params": null
1728
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:188b9cc77247d315e833007ec5b6ee4bead6e01dfa5781ac98b51d69684404d5
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e678611f04c51489e9a36568716af96aebefa9660b4f8c5449f0a99f9629d37
3
  size 2671