AlekseyKorshuk commited on
Commit
ff26826
1 Parent(s): 0a238b6

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/oxxxymiron")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/if83r8sb/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Oxxxymiron's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/s58wdkfz) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/s58wdkfz/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/oxxxymiron")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/e254c9iz/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Oxxxymiron's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1ggk9c4z) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1ggk9c4z/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -37,7 +37,7 @@
37
  }
38
  },
39
  "torch_dtype": "float32",
40
- "transformers_version": "4.15.0",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
37
  }
38
  },
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.20.1",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.3110859394073486, "eval_runtime": 18.5351, "eval_samples_per_second": 20.825, "eval_steps_per_second": 2.644, "epoch": 26.0}
1
+ {"eval_loss": 1.2792030572891235, "eval_runtime": 11.6667, "eval_samples_per_second": 44.057, "eval_steps_per_second": 5.571, "epoch": 27.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf04b5cb92f0bb037efcff6106efd6559eadeb2c169561274b38adf7974d3cbd
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7199341f4dd28233f5a2f7a57d1173f1de5ec3e5c9bda47eff103b938f2f53f0
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89bd3fcc9080941291eb43d62d53800524426bd1841ce2e59ad94d37b402ee47
3
  size 995604017
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09ade92b331627aa5e617dc79f94f4950c20fcb91cc9c206ab7b3fc6ff328ab
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b078eadcf9ca889f51cd804d94e22e90d64d6befb42b2adc89663598895f61d
3
- size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3419aa695794916220adfc7bc1a0a22a3a14811601f48076c9c4970c4e59123e
3
+ size 510396521
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c93f956bb61c562ef5de6ecf68f0acb1e428bb13e97ddfd9eb68fef96714fcef
3
  size 14439
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43de655d52a19502409ebbe70eea7706f630b0bef1a4871e806cca13d3c53b94
3
  size 14439
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40aa28790758cc10852db2e41872a7c552d32089589ee1f68283d1cfcd6cf1da
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf6862649037771cabc79d7217ab41990edef62364ec82c3e0d0afea35db8b9b
3
  size 623
special_tokens_map.json CHANGED
@@ -1 +1,5 @@
1
- {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -1 +1,10 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/oxxxymiron", "tokenizer_class": "GPT2Tokenizer"}
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1024,
6
+ "name_or_path": "huggingartists/oxxxymiron",
7
+ "special_tokens_map_file": null,
8
+ "tokenizer_class": "GPT2Tokenizer",
9
+ "unk_token": "<|endoftext|>"
10
+ }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.3110859394073486,
3
- "best_model_checkpoint": "output/oxxxymiron/checkpoint-4573",
4
- "epoch": 17.0,
5
- "global_step": 4573,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5514,11 +5514,301 @@
5514
  "eval_samples_per_second": 20.855,
5515
  "eval_steps_per_second": 2.647,
5516
  "step": 4573
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5517
  }
5518
  ],
5519
- "max_steps": 6994,
5520
- "num_train_epochs": 26,
5521
- "total_flos": 4776548990976000.0,
5522
  "trial_name": null,
5523
  "trial_params": null
5524
  }
1
  {
2
+ "best_metric": 1.2792030572891235,
3
+ "best_model_checkpoint": "output/oxxxymiron/checkpoint-4807",
4
+ "epoch": 19.0,
5
+ "global_step": 4807,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
5514
  "eval_samples_per_second": 20.855,
5515
  "eval_steps_per_second": 2.647,
5516
  "step": 4573
5517
+ },
5518
+ {
5519
+ "epoch": 18.08,
5520
+ "learning_rate": 0.000134880848712477,
5521
+ "loss": 1.3191,
5522
+ "step": 4575
5523
+ },
5524
+ {
5525
+ "epoch": 18.1,
5526
+ "learning_rate": 0.00013365575351388775,
5527
+ "loss": 1.4082,
5528
+ "step": 4580
5529
+ },
5530
+ {
5531
+ "epoch": 18.12,
5532
+ "learning_rate": 0.00013217996375537754,
5533
+ "loss": 1.381,
5534
+ "step": 4585
5535
+ },
5536
+ {
5537
+ "epoch": 18.14,
5538
+ "learning_rate": 0.0001304591664429994,
5539
+ "loss": 1.3937,
5540
+ "step": 4590
5541
+ },
5542
+ {
5543
+ "epoch": 18.16,
5544
+ "learning_rate": 0.00012849999272775362,
5545
+ "loss": 1.3955,
5546
+ "step": 4595
5547
+ },
5548
+ {
5549
+ "epoch": 18.18,
5550
+ "learning_rate": 0.000126309992352219,
5551
+ "loss": 1.3851,
5552
+ "step": 4600
5553
+ },
5554
+ {
5555
+ "epoch": 18.2,
5556
+ "learning_rate": 0.00012389760455736593,
5557
+ "loss": 1.3328,
5558
+ "step": 4605
5559
+ },
5560
+ {
5561
+ "epoch": 18.22,
5562
+ "learning_rate": 0.00012127212556165209,
5563
+ "loss": 1.3809,
5564
+ "step": 4610
5565
+ },
5566
+ {
5567
+ "epoch": 18.24,
5568
+ "learning_rate": 0.00011844367273772787,
5569
+ "loss": 1.2981,
5570
+ "step": 4615
5571
+ },
5572
+ {
5573
+ "epoch": 18.26,
5574
+ "learning_rate": 0.00011542314562479984,
5575
+ "loss": 1.4094,
5576
+ "step": 4620
5577
+ },
5578
+ {
5579
+ "epoch": 18.28,
5580
+ "learning_rate": 0.00011222218392688052,
5581
+ "loss": 1.4044,
5582
+ "step": 4625
5583
+ },
5584
+ {
5585
+ "epoch": 18.3,
5586
+ "learning_rate": 0.0001088531226587985,
5587
+ "loss": 1.4849,
5588
+ "step": 4630
5589
+ },
5590
+ {
5591
+ "epoch": 18.32,
5592
+ "learning_rate": 0.00010532894461279404,
5593
+ "loss": 1.4488,
5594
+ "step": 4635
5595
+ },
5596
+ {
5597
+ "epoch": 18.34,
5598
+ "learning_rate": 0.00010166323032888931,
5599
+ "loss": 1.4335,
5600
+ "step": 4640
5601
+ },
5602
+ {
5603
+ "epoch": 18.36,
5604
+ "learning_rate": 9.78701057618181e-05,
5605
+ "loss": 1.3215,
5606
+ "step": 4645
5607
+ },
5608
+ {
5609
+ "epoch": 18.38,
5610
+ "learning_rate": 9.396418784617256e-05,
5611
+ "loss": 1.4931,
5612
+ "step": 4650
5613
+ },
5614
+ {
5615
+ "epoch": 18.4,
5616
+ "learning_rate": 8.996052816955526e-05,
5617
+ "loss": 1.4301,
5618
+ "step": 4655
5619
+ },
5620
+ {
5621
+ "epoch": 18.42,
5622
+ "learning_rate": 8.587455497076757e-05,
5623
+ "loss": 1.3555,
5624
+ "step": 4660
5625
+ },
5626
+ {
5627
+ "epoch": 18.44,
5628
+ "learning_rate": 8.172201368657088e-05,
5629
+ "loss": 1.3862,
5630
+ "step": 4665
5631
+ },
5632
+ {
5633
+ "epoch": 18.46,
5634
+ "learning_rate": 7.751890627611039e-05,
5635
+ "loss": 1.3795,
5636
+ "step": 4670
5637
+ },
5638
+ {
5639
+ "epoch": 18.48,
5640
+ "learning_rate": 7.328142955681618e-05,
5641
+ "loss": 1.4168,
5642
+ "step": 4675
5643
+ },
5644
+ {
5645
+ "epoch": 18.5,
5646
+ "learning_rate": 6.902591278942331e-05,
5647
+ "loss": 1.4594,
5648
+ "step": 4680
5649
+ },
5650
+ {
5651
+ "epoch": 18.52,
5652
+ "learning_rate": 6.47687547526032e-05,
5653
+ "loss": 1.4803,
5654
+ "step": 4685
5655
+ },
5656
+ {
5657
+ "epoch": 18.54,
5658
+ "learning_rate": 6.0526360549714816e-05,
5659
+ "loss": 1.4239,
5660
+ "step": 4690
5661
+ },
5662
+ {
5663
+ "epoch": 18.56,
5664
+ "learning_rate": 5.6315078391183605e-05,
5665
+ "loss": 1.3304,
5666
+ "step": 4695
5667
+ },
5668
+ {
5669
+ "epoch": 18.58,
5670
+ "learning_rate": 5.21511365961095e-05,
5671
+ "loss": 1.3828,
5672
+ "step": 4700
5673
+ },
5674
+ {
5675
+ "epoch": 18.6,
5676
+ "learning_rate": 4.80505810558948e-05,
5677
+ "loss": 1.3273,
5678
+ "step": 4705
5679
+ },
5680
+ {
5681
+ "epoch": 18.62,
5682
+ "learning_rate": 4.402921340084794e-05,
5683
+ "loss": 1.3661,
5684
+ "step": 4710
5685
+ },
5686
+ {
5687
+ "epoch": 18.64,
5688
+ "learning_rate": 4.0102530108070474e-05,
5689
+ "loss": 1.287,
5690
+ "step": 4715
5691
+ },
5692
+ {
5693
+ "epoch": 18.66,
5694
+ "learning_rate": 3.6285662785250574e-05,
5695
+ "loss": 1.3865,
5696
+ "step": 4720
5697
+ },
5698
+ {
5699
+ "epoch": 18.68,
5700
+ "learning_rate": 3.2593319860498044e-05,
5701
+ "loss": 1.428,
5702
+ "step": 4725
5703
+ },
5704
+ {
5705
+ "epoch": 18.7,
5706
+ "learning_rate": 2.9039729902920295e-05,
5707
+ "loss": 1.2403,
5708
+ "step": 4730
5709
+ },
5710
+ {
5711
+ "epoch": 18.72,
5712
+ "learning_rate": 2.5638586792340877e-05,
5713
+ "loss": 1.4223,
5714
+ "step": 4735
5715
+ },
5716
+ {
5717
+ "epoch": 18.74,
5718
+ "learning_rate": 2.2402996949474048e-05,
5719
+ "loss": 1.3913,
5720
+ "step": 4740
5721
+ },
5722
+ {
5723
+ "epoch": 18.75,
5724
+ "learning_rate": 1.9345428829881034e-05,
5725
+ "loss": 1.3764,
5726
+ "step": 4745
5727
+ },
5728
+ {
5729
+ "epoch": 18.77,
5730
+ "learning_rate": 1.647766487635479e-05,
5731
+ "loss": 1.4167,
5732
+ "step": 4750
5733
+ },
5734
+ {
5735
+ "epoch": 18.79,
5736
+ "learning_rate": 1.3810756114877466e-05,
5737
+ "loss": 1.3081,
5738
+ "step": 4755
5739
+ },
5740
+ {
5741
+ "epoch": 18.81,
5742
+ "learning_rate": 1.1354979569111334e-05,
5743
+ "loss": 1.3206,
5744
+ "step": 4760
5745
+ },
5746
+ {
5747
+ "epoch": 18.83,
5748
+ "learning_rate": 9.119798657542995e-06,
5749
+ "loss": 1.3369,
5750
+ "step": 4765
5751
+ },
5752
+ {
5753
+ "epoch": 18.85,
5754
+ "learning_rate": 7.113826725875128e-06,
5755
+ "loss": 1.3328,
5756
+ "step": 4770
5757
+ },
5758
+ {
5759
+ "epoch": 18.87,
5760
+ "learning_rate": 5.344793855206173e-06,
5761
+ "loss": 1.4008,
5762
+ "step": 4775
5763
+ },
5764
+ {
5765
+ "epoch": 18.89,
5766
+ "learning_rate": 3.819517073901737e-06,
5767
+ "loss": 1.41,
5768
+ "step": 4780
5769
+ },
5770
+ {
5771
+ "epoch": 18.91,
5772
+ "learning_rate": 2.5438740879408957e-06,
5773
+ "loss": 1.1899,
5774
+ "step": 4785
5775
+ },
5776
+ {
5777
+ "epoch": 18.93,
5778
+ "learning_rate": 1.522780630978951e-06,
5779
+ "loss": 1.3401,
5780
+ "step": 4790
5781
+ },
5782
+ {
5783
+ "epoch": 18.95,
5784
+ "learning_rate": 7.601715213983543e-07,
5785
+ "loss": 1.3232,
5786
+ "step": 4795
5787
+ },
5788
+ {
5789
+ "epoch": 18.97,
5790
+ "learning_rate": 2.5898549935329754e-07,
5791
+ "loss": 1.3369,
5792
+ "step": 4800
5793
+ },
5794
+ {
5795
+ "epoch": 18.99,
5796
+ "learning_rate": 2.1153902234608112e-08,
5797
+ "loss": 1.4171,
5798
+ "step": 4805
5799
+ },
5800
+ {
5801
+ "epoch": 19.0,
5802
+ "eval_loss": 1.2792030572891235,
5803
+ "eval_runtime": 11.7078,
5804
+ "eval_samples_per_second": 43.902,
5805
+ "eval_steps_per_second": 5.552,
5806
+ "step": 4807
5807
  }
5808
  ],
5809
+ "max_steps": 6831,
5810
+ "num_train_epochs": 27,
5811
+ "total_flos": 5020465102848000.0,
5812
  "trial_name": null,
5813
  "trial_params": null
5814
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c111611d0270c2f70ae5a43dbcc59025e6535cde71a5fbf71689de64f457d265
3
- size 2991
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c8a04c78ae405c90d321579f04eceaba457a750930ce8d60249e0bee7dbd178
3
+ size 3311