AlekseyKorshuk commited on
Commit
ba76bd3
1 Parent(s): 273af26

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/coldplay")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1gzc0ns4/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Coldplay's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/22vgjn8r) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/22vgjn8r/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/coldplay")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/399heq03/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Coldplay's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3o6fr9bq) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3o6fr9bq/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.5336766242980957, "eval_runtime": 2.7958, "eval_samples_per_second": 21.461, "eval_steps_per_second": 2.861, "epoch": 9.0}
 
1
+ {"eval_loss": 1.2095028162002563, "eval_runtime": 3.4191, "eval_samples_per_second": 21.643, "eval_steps_per_second": 2.925, "epoch": 19.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d06678e316f0244aae1a5d5def2e8ab05ebd44699021c19e283ec3d9b3eb7019
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8806c168019cbc1ae2ffed46c832caa9529a17675216fdd1c782cc1176e7331
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f2f3a899d9271926fe05e74d2e1e78a5a284bbd2a99b96fa49ccf0da030a8c8
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37ac05510aa6c132c3344e1ec9d0f1e37c7031d848f40767d745da3aa0e66a9b
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74f16ce366fea146ff0b814bbb0ee1c52aa946330b16cade31e7d9eb5550b66a
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7158419658b030a3d8570b711350a55f73d17342aa9920b8e41aff9c03d4a1
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88fb4b11febc18ef05627a88f778f7e276b66959c10157ee7e09d9fd08d92c2e
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1096f6f012654cb7b05c9d15f0c8c81dff6f30d1dbde8cf0b9489e06f1331a53
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b5e135d5638eeea038dbe56656468198b1ccb302b58ca9cca115146d8a6edf2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9430296501fbe754e360495552f3a391e4c1ba2be1a491d6867af0670e74711
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.5336766242980957,
3
- "best_model_checkpoint": "output/coldplay/checkpoint-450",
4
- "epoch": 9.0,
5
- "global_step": 450,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -626,11 +626,117 @@
626
  "eval_samples_per_second": 22.279,
627
  "eval_steps_per_second": 2.971,
628
  "step": 450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629
  }
630
  ],
631
- "max_steps": 450,
632
- "num_train_epochs": 9,
633
- "total_flos": 468235321344000.0,
634
  "trial_name": null,
635
  "trial_params": null
636
  }
 
1
  {
2
+ "best_metric": 1.2095028162002563,
3
+ "best_model_checkpoint": "output/coldplay/checkpoint-528",
4
+ "epoch": 11.0,
5
+ "global_step": 528,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
626
  "eval_samples_per_second": 22.279,
627
  "eval_steps_per_second": 2.971,
628
  "step": 450
629
+ },
630
+ {
631
+ "epoch": 9.48,
632
+ "learning_rate": 6.411334533481208e-05,
633
+ "loss": 1.183,
634
+ "step": 455
635
+ },
636
+ {
637
+ "epoch": 9.58,
638
+ "learning_rate": 8.635498649403306e-05,
639
+ "loss": 1.514,
640
+ "step": 460
641
+ },
642
+ {
643
+ "epoch": 9.69,
644
+ "learning_rate": 0.00010671211798514463,
645
+ "loss": 1.3301,
646
+ "step": 465
647
+ },
648
+ {
649
+ "epoch": 9.79,
650
+ "learning_rate": 0.0001230240391439787,
651
+ "loss": 1.5153,
652
+ "step": 470
653
+ },
654
+ {
655
+ "epoch": 9.9,
656
+ "learning_rate": 0.0001335594068833643,
657
+ "loss": 1.3648,
658
+ "step": 475
659
+ },
660
+ {
661
+ "epoch": 10.0,
662
+ "learning_rate": 0.0001372,
663
+ "loss": 1.3593,
664
+ "step": 480
665
+ },
666
+ {
667
+ "epoch": 10.0,
668
+ "eval_loss": 1.2220196723937988,
669
+ "eval_runtime": 3.1642,
670
+ "eval_samples_per_second": 23.386,
671
+ "eval_steps_per_second": 3.16,
672
+ "step": 480
673
+ },
674
+ {
675
+ "epoch": 10.1,
676
+ "learning_rate": 0.00013355940688336435,
677
+ "loss": 1.2865,
678
+ "step": 485
679
+ },
680
+ {
681
+ "epoch": 10.21,
682
+ "learning_rate": 0.00012302403914397878,
683
+ "loss": 1.315,
684
+ "step": 490
685
+ },
686
+ {
687
+ "epoch": 10.31,
688
+ "learning_rate": 0.00010671211798514499,
689
+ "loss": 1.237,
690
+ "step": 495
691
+ },
692
+ {
693
+ "epoch": 10.42,
694
+ "learning_rate": 8.635498649403298e-05,
695
+ "loss": 1.1549,
696
+ "step": 500
697
+ },
698
+ {
699
+ "epoch": 10.52,
700
+ "learning_rate": 6.4113345334812e-05,
701
+ "loss": 1.3219,
702
+ "step": 505
703
+ },
704
+ {
705
+ "epoch": 10.62,
706
+ "learning_rate": 4.23479165397549e-05,
707
+ "loss": 1.2553,
708
+ "step": 510
709
+ },
710
+ {
711
+ "epoch": 10.73,
712
+ "learning_rate": 2.3368877084135498e-05,
713
+ "loss": 1.1603,
714
+ "step": 515
715
+ },
716
+ {
717
+ "epoch": 10.83,
718
+ "learning_rate": 9.190657300387535e-06,
719
+ "loss": 1.2085,
720
+ "step": 520
721
+ },
722
+ {
723
+ "epoch": 10.94,
724
+ "learning_rate": 1.3181297643384459e-06,
725
+ "loss": 1.2453,
726
+ "step": 525
727
+ },
728
+ {
729
+ "epoch": 11.0,
730
+ "eval_loss": 1.2095028162002563,
731
+ "eval_runtime": 3.297,
732
+ "eval_samples_per_second": 22.445,
733
+ "eval_steps_per_second": 3.033,
734
+ "step": 528
735
  }
736
  ],
737
+ "max_steps": 912,
738
+ "num_train_epochs": 19,
739
+ "total_flos": 549235851264000.0,
740
  "trial_name": null,
741
  "trial_params": null
742
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8b49270cf7d7730cd41580e35a9163dd952c0d60570cdecffcd61412186ef48
3
  size 2863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8634fa1cc14205deda566a1ce3471c09a00adb32b1ca2deb15e6602eb34457d2
3
  size 2863