huggingartists

Browse files

Files changed (13) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +2 -2
rng_state.pth +1 -1
scheduler.pt +1 -1
special_tokens_map.json +5 -1
tokenizer.json +8 -5
tokenizer_config.json +10 -1
trainer_state.json +371 -7
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/taylor-swift")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11hietbj/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Taylor Swift's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/jwz5zda0) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/jwz5zda0/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/taylor-swift")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2l84tzp2/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Taylor Swift's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1hy7aa65) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1hy7aa65/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -36,7 +36,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.16.2",
   "use_cache": true,
   "vocab_size": 50257
 }

     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.20.1",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~4377235174179077~~, "eval_runtime": 9.~~5718~~, "eval_samples_per_second": 20.~~895~~, "eval_steps_per_second": 2.~~612~~, "epoch": 7.0}


1	+ {"eval_loss": 1.3240652084350586, "eval_runtime": 5.3381, "eval_samples_per_second": 42.712, "eval_steps_per_second": 5.433, "epoch": 12.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76ae138578ec229ab199e35e58682cc0818297990880e98b41bb7caca21799bb
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0193dabaedc886d2423381b1d4991d098c388ec01408d24cb8525d1c5031eb2
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd96b2ecf6961b37a51c11488af2110f05d27248a9d735d9cec37797c7c45cbe
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:2fc77d09c4c454b54d1ca6720bc643fbbdc7dae8b9f46f9dfbe190c61e89c49b
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a05cf58e9ce70f6bc6054d004220f5b59634b254e9d1c7c20c77dd1d160dacdc
-size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e35f64d133e2da786674123c81f13fa6ce6316d7e8029fc026e6c67f01baf8b
+size 510396521

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0917e5420fc90d80d96ba9647582ee18ea3c8fd35cfb2e8cff174e79d4c678c0
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:664be080bd3dc652f628b2caf10ec5043081add1f81d7f2e4ab19f1da66a9fd6
 size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:696255b08bfcc0a618196cb4d29aaed44996ae47e5ca36d14f60bf60ece9f170
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b79408328cb94b5f58b90ffdd8b7127d5992674b5e2cf7df0005f8718053694
 size 623

special_tokens_map.json CHANGED Viewed

	@@ -1 +1,5 @@
1	- {~~"bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "unk_token": "<\|endoftext\|>"}~~

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json CHANGED Viewed

@@ -5,29 +5,32 @@
   "added_tokens": [
     {
       "id": 50256,
-      "special": true,
       "content": "<|endoftext|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     }
   ],
   "normalizer": null,
   "pre_tokenizer": {
     "type": "ByteLevel",
     "add_prefix_space": false,
-    "trim_offsets": true
   },
   "post_processor": {
     "type": "ByteLevel",
     "add_prefix_space": true,
-    "trim_offsets": false
   },
   "decoder": {
     "type": "ByteLevel",
     "add_prefix_space": true,
-    "trim_offsets": true
   },
   "model": {
     "type": "BPE",

   "added_tokens": [
     {
       "id": 50256,
       "content": "<|endoftext|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
   "pre_tokenizer": {
     "type": "ByteLevel",
     "add_prefix_space": false,
+    "trim_offsets": true,
+    "use_regex": true
   },
   "post_processor": {
     "type": "ByteLevel",
     "add_prefix_space": true,
+    "trim_offsets": false,
+    "use_regex": true
   },
   "decoder": {
     "type": "ByteLevel",
     "add_prefix_space": true,
+    "trim_offsets": true,
+    "use_regex": true
   },
   "model": {
     "type": "BPE",

tokenizer_config.json CHANGED Viewed

	@@ -1 +1,10 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/taylor-swift", "tokenizer_class": "GPT2Tokenizer"}

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "name_or_path": "huggingartists/taylor-swift",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.4377235174179077,
-  "best_model_checkpoint": "output/taylor-swift/checkpoint-1120",
-  "epoch": 7.0,
-  "global_step": 1120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1414,11 +1414,375 @@
       "eval_samples_per_second": 21.045,
       "eval_steps_per_second": 2.631,
       "step": 1120
     }
   ],
-  "max_steps": 1120,
-  "num_train_epochs": 7,
-  "total_flos": 1168759259136000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.3240652084350586,
+  "best_model_checkpoint": "output/taylor-swift/checkpoint-1413",
+  "epoch": 9.0,
+  "global_step": 1413,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.045,
       "eval_steps_per_second": 2.631,
       "step": 1120
+    },
+    {
+      "epoch": 7.17,
+      "learning_rate": 9.076596574074994e-06,
+      "loss": 1.4269,
+      "step": 1125
+    },
+    {
+      "epoch": 7.2,
+      "learning_rate": 1.2780475216607764e-05,
+      "loss": 1.559,
+      "step": 1130
+    },
+    {
+      "epoch": 7.23,
+      "learning_rate": 1.7042649548372873e-05,
+      "loss": 1.529,
+      "step": 1135
+    },
+    {
+      "epoch": 7.26,
+      "learning_rate": 2.182049015671568e-05,
+      "loss": 1.6687,
+      "step": 1140
+    },
+    {
+      "epoch": 7.29,
+      "learning_rate": 2.7066210038327817e-05,
+      "loss": 1.5638,
+      "step": 1145
+    },
+    {
+      "epoch": 7.32,
+      "learning_rate": 3.2727342555268683e-05,
+      "loss": 1.5554,
+      "step": 1150
+    },
+    {
+      "epoch": 7.36,
+      "learning_rate": 3.874726619575346e-05,
+      "loss": 1.317,
+      "step": 1155
+    },
+    {
+      "epoch": 7.39,
+      "learning_rate": 4.5065770891153554e-05,
+      "loss": 1.679,
+      "step": 1160
+    },
+    {
+      "epoch": 7.42,
+      "learning_rate": 5.161966022502662e-05,
+      "loss": 1.6683,
+      "step": 1165
+    },
+    {
+      "epoch": 7.45,
+      "learning_rate": 5.834338351099537e-05,
+      "loss": 1.4034,
+      "step": 1170
+    },
+    {
+      "epoch": 7.48,
+      "learning_rate": 6.516969141756308e-05,
+      "loss": 1.6013,
+      "step": 1175
+    },
+    {
+      "epoch": 7.52,
+      "learning_rate": 7.20303085824368e-05,
+      "loss": 1.5516,
+      "step": 1180
+    },
+    {
+      "epoch": 7.55,
+      "learning_rate": 7.885661648900452e-05,
+      "loss": 1.5248,
+      "step": 1185
+    },
+    {
+      "epoch": 7.58,
+      "learning_rate": 8.558033977497326e-05,
+      "loss": 1.3379,
+      "step": 1190
+    },
+    {
+      "epoch": 7.61,
+      "learning_rate": 9.213422910884634e-05,
+      "loss": 1.3994,
+      "step": 1195
+    },
+    {
+      "epoch": 7.64,
+      "learning_rate": 9.845273380424641e-05,
+      "loss": 1.4434,
+      "step": 1200
+    },
+    {
+      "epoch": 7.68,
+      "learning_rate": 0.00010447265744473122,
+      "loss": 1.6187,
+      "step": 1205
+    },
+    {
+      "epoch": 7.71,
+      "learning_rate": 0.00011013378996167208,
+      "loss": 1.5693,
+      "step": 1210
+    },
+    {
+      "epoch": 7.74,
+      "learning_rate": 0.00011537950984328424,
+      "loss": 1.4006,
+      "step": 1215
+    },
+    {
+      "epoch": 7.77,
+      "learning_rate": 0.00012015735045162704,
+      "loss": 1.7153,
+      "step": 1220
+    },
+    {
+      "epoch": 7.8,
+      "learning_rate": 0.0001244195247833922,
+      "loss": 1.5277,
+      "step": 1225
+    },
+    {
+      "epoch": 7.83,
+      "learning_rate": 0.00012812340342592494,
+      "loss": 1.5052,
+      "step": 1230
+    },
+    {
+      "epoch": 7.87,
+      "learning_rate": 0.0001312319409280581,
+      "loss": 1.548,
+      "step": 1235
+    },
+    {
+      "epoch": 7.9,
+      "learning_rate": 0.00013371404632128166,
+      "loss": 1.4896,
+      "step": 1240
+    },
+    {
+      "epoch": 7.93,
+      "learning_rate": 0.0001355448940853745,
+      "loss": 1.5506,
+      "step": 1245
+    },
+    {
+      "epoch": 7.96,
+      "learning_rate": 0.00013670617244827653,
+      "loss": 1.6902,
+      "step": 1250
+    },
+    {
+      "epoch": 7.99,
+      "learning_rate": 0.0001371862665367597,
+      "loss": 1.5112,
+      "step": 1255
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 1.352002739906311,
+      "eval_runtime": 5.5761,
+      "eval_samples_per_second": 40.889,
+      "eval_steps_per_second": 5.201,
+      "step": 1256
+    },
+    {
+      "epoch": 8.03,
+      "learning_rate": 0.00013698037454606005,
+      "loss": 1.3097,
+      "step": 1260
+    },
+    {
+      "epoch": 8.06,
+      "learning_rate": 0.0001360905557665658,
+      "loss": 1.5119,
+      "step": 1265
+    },
+    {
+      "epoch": 8.09,
+      "learning_rate": 0.00013452570998720767,
+      "loss": 1.2738,
+      "step": 1270
+    },
+    {
+      "epoch": 8.12,
+      "learning_rate": 0.00013230148848155559,
+      "loss": 1.2764,
+      "step": 1275
+    },
+    {
+      "epoch": 8.15,
+      "learning_rate": 0.00012944013746692,
+      "loss": 1.2347,
+      "step": 1280
+    },
+    {
+      "epoch": 8.18,
+      "learning_rate": 0.00012597027560214946,
+      "loss": 1.4534,
+      "step": 1285
+    },
+    {
+      "epoch": 8.22,
+      "learning_rate": 0.00012192660774954517,
+      "loss": 1.4193,
+      "step": 1290
+    },
+    {
+      "epoch": 8.25,
+      "learning_rate": 0.00011734957786379066,
+      "loss": 1.5185,
+      "step": 1295
+    },
+    {
+      "epoch": 8.28,
+      "learning_rate": 0.00011228496447963,
+      "loss": 1.5225,
+      "step": 1300
+    },
+    {
+      "epoch": 8.31,
+      "learning_rate": 0.0001067834228441478,
+      "loss": 1.8013,
+      "step": 1305
+    },
+    {
+      "epoch": 8.34,
+      "learning_rate": 0.00010089997827314661,
+      "loss": 1.4035,
+      "step": 1310
+    },
+    {
+      "epoch": 8.38,
+      "learning_rate": 9.469347579898059e-05,
+      "loss": 1.4431,
+      "step": 1315
+    },
+    {
+      "epoch": 8.41,
+      "learning_rate": 8.82259916143434e-05,
+      "loss": 1.3074,
+      "step": 1320
+    },
+    {
+      "epoch": 8.44,
+      "learning_rate": 8.1562212198643e-05,
+      "loss": 1.3587,
+      "step": 1325
+    },
+    {
+      "epoch": 8.47,
+      "learning_rate": 7.476878733681043e-05,
+      "loss": 1.5297,
+      "step": 1330
+    },
+    {
+      "epoch": 8.5,
+      "learning_rate": 6.791366350152217e-05,
+      "loss": 1.2815,
+      "step": 1335
+    },
+    {
+      "epoch": 8.54,
+      "learning_rate": 6.106540426620946e-05,
+      "loss": 1.5607,
+      "step": 1340
+    },
+    {
+      "epoch": 8.57,
+      "learning_rate": 5.4292504545952105e-05,
+      "loss": 1.1765,
+      "step": 1345
+    },
+    {
+      "epoch": 8.6,
+      "learning_rate": 4.766270552507704e-05,
+      "loss": 1.5363,
+      "step": 1350
+    },
+    {
+      "epoch": 8.63,
+      "learning_rate": 4.124231712342338e-05,
+      "loss": 1.4107,
+      "step": 1355
+    },
+    {
+      "epoch": 8.66,
+      "learning_rate": 3.509555477782507e-05,
+      "loss": 1.4608,
+      "step": 1360
+    },
+    {
+      "epoch": 8.69,
+      "learning_rate": 2.928389717219465e-05,
+      "loss": 1.3783,
+      "step": 1365
+    },
+    {
+      "epoch": 8.73,
+      "learning_rate": 2.386547134005838e-05,
+      "loss": 1.2348,
+      "step": 1370
+    },
+    {
+      "epoch": 8.76,
+      "learning_rate": 1.889447128962836e-05,
+      "loss": 1.5068,
+      "step": 1375
+    },
+    {
+      "epoch": 8.79,
+      "learning_rate": 1.4420615966203568e-05,
+      "loss": 1.3345,
+      "step": 1380
+    },
+    {
+      "epoch": 8.82,
+      "learning_rate": 1.0488651973253671e-05,
+      "loss": 1.2429,
+      "step": 1385
+    },
+    {
+      "epoch": 8.85,
+      "learning_rate": 7.13790602586542e-06,
+      "loss": 1.3129,
+      "step": 1390
+    },
+    {
+      "epoch": 8.89,
+      "learning_rate": 4.401891612830206e-06,
+      "loss": 1.2724,
+      "step": 1395
+    },
+    {
+      "epoch": 8.92,
+      "learning_rate": 2.307973801450113e-06,
+      "loss": 1.3608,
+      "step": 1400
+    },
+    {
+      "epoch": 8.95,
+      "learning_rate": 8.770955376250992e-07,
+      "loss": 1.4328,
+      "step": 1405
+    },
+    {
+      "epoch": 8.98,
+      "learning_rate": 1.2356817870921626e-07,
+      "loss": 1.3714,
+      "step": 1410
+    },
+    {
+      "epoch": 9.0,
+      "eval_loss": 1.3240652084350586,
+      "eval_runtime": 5.3842,
+      "eval_samples_per_second": 42.346,
+      "eval_steps_per_second": 5.386,
+      "step": 1413
     }
   ],
+  "max_steps": 1884,
+  "num_train_epochs": 12,
+  "total_flos": 1473164476416000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ca623b70d31f90284e0c9f8a31f7cb178f04870d48cd416af0fe84dfe57c1e5
-size 3055

 version https://git-lfs.github.com/spec/v1
+oid sha256:c178f13ee8a4a4e92389f456bb271b331e24a800185642959a69b718d48e7169
+size 3311