huggingartists

Browse files

Files changed (10) hide show

README.md +3 -3
config.json +3 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +2 -2
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +93 -7
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/linkin-park")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2b15vg01/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Linkin Park's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/fto51dp4) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/fto51dp4/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/linkin-park")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3mtr0u4z/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Linkin Park's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/fxn4brd6) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/fxn4brd6/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -18,7 +18,9 @@
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
   "resid_pdrop": 0.1,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.10.2",
   "use_cache": true,
   "vocab_size": 50257
 }

   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.12.2",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 2.~~0318267345428467~~, "eval_runtime": 4.~~5697~~, "eval_samples_per_second": 21.~~008~~, "eval_steps_per_second": 2.~~626~~, "epoch": 3.0}


1	+ {"eval_loss": 1.7180215120315552, "eval_runtime": 4.7844, "eval_samples_per_second": 20.901, "eval_steps_per_second": 2.717, "epoch": 4.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:960e645cfdb5661c8a5575898191f3f17c064dc742a6f5d1c1e623c5a21c709e
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:e788d77fe1b0e4b37310b71f47d838058cad60aa5aa7e9a144ead4b8432a2469
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8e18e8cd6f8f5633cbbd1851d80023da278856308536afdaf38cbfb154a8732
-size 995603825

 version https://git-lfs.github.com/spec/v1
+oid sha256:9491f77c6075e808c2ec8cc10e543cb3dec8b4f1e719478904830c0bb572bb3d
+size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a568564c9d7a3f53afa0d2c83e7145788515ff74c6c201a2bf1f4cce94aecb72
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:db75cae56bb0856bb04b9fc712dfbdc2552084b04e1bca7596d8840a170bea72
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:623c33d8f94c376806d8d4f4fd33e1db6afbc9689cfdf6d4fc7c47fa87f23bdd
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ddf98a38c2c0ecfdce82ba131154c1ba8b98f75a392abba4114eefe1d934305
 size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81c6de287d0aeee2c682cd2a01e5e01657f181e757ee594a86aa0945f540d0b1
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:007eb1dede0d4998d3ab2e2cd3723c770930c260860d14cb2ace888387dbf23d
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 2.0318267345428467,
-  "best_model_checkpoint": "output/linkin-park/checkpoint-210",
-  "epoch": 3.0,
-  "global_step": 210,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -290,11 +290,97 @@
       "eval_samples_per_second": 21.081,
       "eval_steps_per_second": 2.635,
       "step": 210
     }
   ],
-  "max_steps": 210,
-  "num_train_epochs": 3,
-  "total_flos": 217264324608000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.7180215120315552,
+  "best_model_checkpoint": "output/linkin-park/checkpoint-276",
+  "epoch": 4.0,
+  "global_step": 276,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.081,
       "eval_steps_per_second": 2.635,
       "step": 210
+    },
+    {
+      "epoch": 3.12,
+      "learning_rate": 4.500582770777967e-06,
+      "loss": 1.8704,
+      "step": 215
+    },
+    {
+      "epoch": 3.19,
+      "learning_rate": 1.166986896886688e-05,
+      "loss": 1.6973,
+      "step": 220
+    },
+    {
+      "epoch": 3.26,
+      "learning_rate": 2.177685437520025e-05,
+      "loss": 1.9541,
+      "step": 225
+    },
+    {
+      "epoch": 3.33,
+      "learning_rate": 3.4300000000000014e-05,
+      "loss": 1.7097,
+      "step": 230
+    },
+    {
+      "epoch": 3.41,
+      "learning_rate": 4.859308855339902e-05,
+      "loss": 1.8994,
+      "step": 235
+    },
+    {
+      "epoch": 3.48,
+      "learning_rate": 6.391857044318346e-05,
+      "loss": 1.772,
+      "step": 240
+    },
+    {
+      "epoch": 3.55,
+      "learning_rate": 7.948562268689874e-05,
+      "loss": 1.7301,
+      "step": 245
+    },
+    {
+      "epoch": 3.62,
+      "learning_rate": 9.449095682862937e-05,
+      "loss": 2.1161,
+      "step": 250
+    },
+    {
+      "epoch": 3.7,
+      "learning_rate": 0.0001081602700970798,
+      "loss": 1.876,
+      "step": 255
+    },
+    {
+      "epoch": 3.77,
+      "learning_rate": 0.00011978820084915117,
+      "loss": 1.9729,
+      "step": 260
+    },
+    {
+      "epoch": 3.84,
+      "learning_rate": 0.00012877472652481797,
+      "loss": 1.7309,
+      "step": 265
+    },
+    {
+      "epoch": 3.91,
+      "learning_rate": 0.00013465612591205902,
+      "loss": 2.0053,
+      "step": 270
+    },
+    {
+      "epoch": 3.99,
+      "learning_rate": 0.00013712890801216552,
+      "loss": 1.7362,
+      "step": 275
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 1.7180215120315552,
+      "eval_runtime": 4.7996,
+      "eval_samples_per_second": 20.835,
+      "eval_steps_per_second": 2.709,
+      "step": 276
     }
   ],
+  "max_steps": 276,
+  "num_train_epochs": 4,
+  "total_flos": 286245421056000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aada2de1d92f467f8f5669200ff659c45b17df03bf26159792e731f4a3fea9f2
-size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:a579222290c0d1711177447b62010d2079a583ef41b959a81fd08b749cf9458e
+size 2863