huggingartists

Browse files

Files changed (11) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
tokenizer_config.json +1 -1
trainer_state.json +341 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/50-cent")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/20hup4zs/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 50 Cent's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3mjwl9eq) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3mjwl9eq/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/50-cent")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1awg3ygb/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 50 Cent's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/ld8pvc1j) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/ld8pvc1j/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

 {
+  "_name_or_path": "huggingartists/50-cent",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 3.~~57045841217041~~, "eval_runtime": 16.~~4515~~, "eval_samples_per_second": 22.49, "eval_steps_per_second": 2.~~857~~, "epoch": 1.0}


1	+ {"eval_loss": 3.3637726306915283, "eval_runtime": 16.4999, "eval_samples_per_second": 20.727, "eval_steps_per_second": 2.606, "epoch": 2.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9827d417fb0de468e6fcc87474696d7324c62371704595e5db2b86387e430d4f
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:358a6f342b56f9de0136e070ea234ddaa8e96700df6535caddb90f83533ad73a
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bcd47e8928507e9fd970c4e3f0d0f823da8245285e0b0218df26a4d69fea66c0
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:54d8c0438e44a5c817661dde8d80235759dd9bf6fd31e60b82390ac67a903164
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba9de888594396c86e2cf894c62dd4e48624d116cf4c441b6dd1a69c5b7e02c9
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:39028db0b9207c67d95d8bc7b1634c498f86342adff06c8b1c9d2d4614a45efe
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92dbfd343ee86788257a9f9b700923ca652db2e930c8f12766afb964d94ec657
-size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1962bee387c556be33f8635e4ecc47b5f5d760f1525f5cf294369a7e3bac39b
+size 14439

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f73ccb458a3740b64f04a91bff674b6289e4a70414fadf4a36dcb72344df060
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:56a03ee9c4f7d46c39bb609d5652834cf98daba2d29c096120b392bb7cba62b1
 size 623

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "~~gpt2~~", "tokenizer_class": "GPT2Tokenizer"}


1	+ {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/50-cent", "tokenizer_class": "GPT2Tokenizer"}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 3.57045841217041,
-  "best_model_checkpoint": "output/50-cent/checkpoint-258",
-  "epoch": 1.0,
-  "global_step": 258,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -320,11 +320,345 @@
       "eval_samples_per_second": 22.47,
       "eval_steps_per_second": 2.854,
       "step": 258
     }
   ],
-  "max_steps": 258,
-  "num_train_epochs": 1,
-  "total_flos": 268869500928000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 3.3637726306915283,
+  "best_model_checkpoint": "output/50-cent/checkpoint-522",
+  "epoch": 2.0,
+  "global_step": 522,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.47,
       "eval_steps_per_second": 2.854,
       "step": 258
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.969441783384187e-09,
+      "loss": 3.3342,
+      "step": 260
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 3.3782756328582764,
+      "eval_runtime": 16.6767,
+      "eval_samples_per_second": 20.508,
+      "eval_steps_per_second": 2.578,
+      "step": 261
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 7.949666974663345e-08,
+      "loss": 3.4715,
+      "step": 265
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 4.0213613921093164e-07,
+      "loss": 3.4357,
+      "step": 270
+    },
+    {
+      "epoch": 1.05,
+      "learning_rate": 9.717195750166447e-07,
+      "loss": 3.3402,
+      "step": 275
+    },
+    {
+      "epoch": 1.07,
+      "learning_rate": 1.7861845197078197e-06,
+      "loss": 3.395,
+      "step": 280
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 2.842581801675534e-06,
+      "loss": 3.3568,
+      "step": 285
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 4.137086214086682e-06,
+      "loss": 3.6921,
+      "step": 290
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 5.66501036593004e-06,
+      "loss": 3.4689,
+      "step": 295
+    },
+    {
+      "epoch": 1.15,
+      "learning_rate": 7.420821655024756e-06,
+      "loss": 3.5794,
+      "step": 300
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 9.39816230153247e-06,
+      "loss": 3.4054,
+      "step": 305
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 1.1589872369431459e-05,
+      "loss": 3.5207,
+      "step": 310
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 1.3988015692592823e-05,
+      "loss": 3.352,
+      "step": 315
+    },
+    {
+      "epoch": 1.23,
+      "learning_rate": 1.658390861157988e-05,
+      "loss": 3.5263,
+      "step": 320
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 1.936815141711555e-05,
+      "loss": 3.4251,
+      "step": 325
+    },
+    {
+      "epoch": 1.26,
+      "learning_rate": 2.2330662386360735e-05,
+      "loss": 3.5036,
+      "step": 330
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 2.5460714288759305e-05,
+      "loss": 3.6056,
+      "step": 335
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 2.8746973229261208e-05,
+      "loss": 3.4339,
+      "step": 340
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 3.2177539688273746e-05,
+      "loss": 3.2688,
+      "step": 345
+    },
+    {
+      "epoch": 1.34,
+      "learning_rate": 3.5739991609734934e-05,
+      "loss": 3.6447,
+      "step": 350
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 3.9421429381287695e-05,
+      "loss": 3.4024,
+      "step": 355
+    },
+    {
+      "epoch": 1.38,
+      "learning_rate": 4.320852254368187e-05,
+      "loss": 3.332,
+      "step": 360
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 4.7087558060269536e-05,
+      "loss": 3.4638,
+      "step": 365
+    },
+    {
+      "epoch": 1.42,
+      "learning_rate": 5.1044489971810725e-05,
+      "loss": 3.5294,
+      "step": 370
+    },
+    {
+      "epoch": 1.44,
+      "learning_rate": 5.506499025678891e-05,
+      "loss": 3.4554,
+      "step": 375
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 5.9134500713072235e-05,
+      "loss": 3.2164,
+      "step": 380
+    },
+    {
+      "epoch": 1.48,
+      "learning_rate": 6.323828567305678e-05,
+      "loss": 3.5425,
+      "step": 385
+    },
+    {
+      "epoch": 1.49,
+      "learning_rate": 6.736148536141151e-05,
+      "loss": 3.3742,
+      "step": 390
+    },
+    {
+      "epoch": 1.51,
+      "learning_rate": 7.148916970221591e-05,
+      "loss": 3.4436,
+      "step": 395
+    },
+    {
+      "epoch": 1.53,
+      "learning_rate": 7.560639238065579e-05,
+      "loss": 3.4376,
+      "step": 400
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 7.969824496351964e-05,
+      "loss": 3.5896,
+      "step": 405
+    },
+    {
+      "epoch": 1.57,
+      "learning_rate": 8.374991088252677e-05,
+      "loss": 3.4729,
+      "step": 410
+    },
+    {
+      "epoch": 1.59,
+      "learning_rate": 8.774671908501242e-05,
+      "loss": 3.4136,
+      "step": 415
+    },
+    {
+      "epoch": 1.61,
+      "learning_rate": 9.1674197157702e-05,
+      "loss": 3.475,
+      "step": 420
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 9.551812373121417e-05,
+      "loss": 3.3729,
+      "step": 425
+    },
+    {
+      "epoch": 1.65,
+      "learning_rate": 9.926457997553504e-05,
+      "loss": 3.5851,
+      "step": 430
+    },
+    {
+      "epoch": 1.67,
+      "learning_rate": 0.00010290000000000001,
+      "loss": 3.4967,
+      "step": 435
+    },
+    {
+      "epoch": 1.69,
+      "learning_rate": 0.0001064112199752845,
+      "loss": 3.3427,
+      "step": 440
+    },
+    {
+      "epoch": 1.7,
+      "learning_rate": 0.0001097855257995339,
+      "loss": 3.5208,
+      "step": 445
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 0.00011301069913603334,
+      "loss": 3.4049,
+      "step": 450
+    },
+    {
+      "epoch": 1.74,
+      "learning_rate": 0.00011607506165571554,
+      "loss": 3.452,
+      "step": 455
+    },
+    {
+      "epoch": 1.76,
+      "learning_rate": 0.00011896751732430487,
+      "loss": 3.5148,
+      "step": 460
+    },
+    {
+      "epoch": 1.78,
+      "learning_rate": 0.00012167759258097654,
+      "loss": 3.3147,
+      "step": 465
+    },
+    {
+      "epoch": 1.8,
+      "learning_rate": 0.00012419547426304373,
+      "loss": 3.38,
+      "step": 470
+    },
+    {
+      "epoch": 1.82,
+      "learning_rate": 0.00012651204513934757,
+      "loss": 3.3329,
+      "step": 475
+    },
+    {
+      "epoch": 1.84,
+      "learning_rate": 0.00012861891692368509,
+      "loss": 3.5188,
+      "step": 480
+    },
+    {
+      "epoch": 1.86,
+      "learning_rate": 0.00013050846064873163,
+      "loss": 3.4628,
+      "step": 485
+    },
+    {
+      "epoch": 1.88,
+      "learning_rate": 0.0001321738342904763,
+      "loss": 3.5377,
+      "step": 490
+    },
+    {
+      "epoch": 1.9,
+      "learning_rate": 0.00013360900754314024,
+      "loss": 3.4683,
+      "step": 495
+    },
+    {
+      "epoch": 1.92,
+      "learning_rate": 0.00013480878365487042,
+      "loss": 3.4532,
+      "step": 500
+    },
+    {
+      "epoch": 1.93,
+      "learning_rate": 0.00013576881824513962,
+      "loss": 3.5582,
+      "step": 505
+    },
+    {
+      "epoch": 1.95,
+      "learning_rate": 0.00013648563503571674,
+      "loss": 3.5528,
+      "step": 510
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 0.00013695663843824482,
+      "loss": 3.4235,
+      "step": 515
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 0.00013718012295284757,
+      "loss": 3.5739,
+      "step": 520
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 3.3637726306915283,
+      "eval_runtime": 16.4835,
+      "eval_samples_per_second": 20.748,
+      "eval_steps_per_second": 2.609,
+      "step": 522
     }
   ],
+  "max_steps": 522,
+  "num_train_epochs": 2,
+  "total_flos": 544271302656000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0338d38146cc715ce37a1415c1593b31576abfbd8eef8f32a19e914cd42011fa
 size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6f290bf43c68f84d50f113a11b20c55809e90aac8aa7e58b408f5dbe3f578ab
 size 2671