huggingartists

Browse files

Files changed (10) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
trainer_state.json +167 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/morgenshtern")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/m9zgb2vd/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/ys11x7xp) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/ys11x7xp/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/morgenshtern")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/p956kstb/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1st5hxmj) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1st5hxmj/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -35,7 +35,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.10.0",
   "use_cache": true,
   "vocab_size": 50257
 }

     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.10.1",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~1428929567337036~~, "eval_runtime": 7.~~9754~~, "eval_samples_per_second": 21.~~441~~, "eval_steps_per_second": 2.~~758~~, "epoch": 12.0}


1	+ {"eval_loss": 1.0746197700500488, "eval_runtime": 7.0746, "eval_samples_per_second": 22.051, "eval_steps_per_second": 2.827, "epoch": 13.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:355b1113b602cec6460d0f66c7318d9309a2d51638c83d166b18454dad89bd5c
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:c647ce86d1b1a86724518a6df72da43aedb998658e398e70f58daf9a2243c2d2
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b59045dfc4ed4d02f26069b5cc05179d371e5fa764cb055d1a830e8c0382acd4
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0215949aa5275f39e7df7af91225d000e2c9646cf575dd56a76789468682a08
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3dc5e091ec6cb7f165ce89c1bc5ddd005d9d36c5598145f3b40df6a6b9c583cb
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:b502aba936ef42281e40d36ce74875ad930f76b429614a069ad2a0cf5d62940e
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c63f55fe8fef4a486cd65797398129fb611c4447a861094695f459a188101b5d
-size 14631

 version https://git-lfs.github.com/spec/v1
+oid sha256:64f7ad53ae8cbda635558cf1458df6c46c041d6541dd1606dd52a4492e1dfbd5
+size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e825854d4daee9b3baa1369de3bcfdc0baa5bcd259634286f62b5236f6992a0f
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbd7e7d8b60f58c4a16b003557100be8552014654bb06fb1afc69d6829aafac8
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.1428929567337036,
-  "best_model_checkpoint": "output/morgenshtern/checkpoint-1140",
-  "epoch": 12.0,
-  "global_step": 1140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1494,11 +1494,171 @@
       "eval_samples_per_second": 21.48,
       "eval_steps_per_second": 2.764,
       "step": 1140
     }
   ],
-  "max_steps": 1140,
-  "num_train_epochs": 12,
-  "total_flos": 1182346444800000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.0746197700500488,
+  "best_model_checkpoint": "output/morgenshtern/checkpoint-1261",
+  "epoch": 13.0,
+  "global_step": 1261,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.48,
       "eval_steps_per_second": 2.764,
       "step": 1140
+    },
+    {
+      "epoch": 11.8,
+      "learning_rate": 0.00012461626728572456,
+      "loss": 1.2708,
+      "step": 1145
+    },
+    {
+      "epoch": 11.86,
+      "learning_rate": 0.000130268089438458,
+      "loss": 1.2385,
+      "step": 1150
+    },
+    {
+      "epoch": 11.91,
+      "learning_rate": 0.00013430626843929596,
+      "loss": 1.2608,
+      "step": 1155
+    },
+    {
+      "epoch": 11.96,
+      "learning_rate": 0.00013662513894413278,
+      "loss": 1.2132,
+      "step": 1160
+    },
+    {
+      "epoch": 12.0,
+      "eval_loss": 1.1082079410552979,
+      "eval_runtime": 6.721,
+      "eval_samples_per_second": 23.211,
+      "eval_steps_per_second": 2.976,
+      "step": 1164
+    },
+    {
+      "epoch": 12.01,
+      "learning_rate": 0.00013716402403652231,
+      "loss": 1.2701,
+      "step": 1165
+    },
+    {
+      "epoch": 12.06,
+      "learning_rate": 0.0001359088229352192,
+      "loss": 1.2274,
+      "step": 1170
+    },
+    {
+      "epoch": 12.11,
+      "learning_rate": 0.0001328923799634352,
+      "loss": 1.1978,
+      "step": 1175
+    },
+    {
+      "epoch": 12.16,
+      "learning_rate": 0.0001281936251251452,
+      "loss": 1.0879,
+      "step": 1180
+    },
+    {
+      "epoch": 12.22,
+      "learning_rate": 0.00012193550877662404,
+      "loss": 1.2841,
+      "step": 1185
+    },
+    {
+      "epoch": 12.27,
+      "learning_rate": 0.00011428178443580113,
+      "loss": 1.166,
+      "step": 1190
+    },
+    {
+      "epoch": 12.32,
+      "learning_rate": 0.0001054327239123201,
+      "loss": 1.2385,
+      "step": 1195
+    },
+    {
+      "epoch": 12.37,
+      "learning_rate": 9.561987687870095e-05,
+      "loss": 1.1758,
+      "step": 1200
+    },
+    {
+      "epoch": 12.42,
+      "learning_rate": 8.51000120067249e-05,
+      "loss": 1.1698,
+      "step": 1205
+    },
+    {
+      "epoch": 12.47,
+      "learning_rate": 7.414839820879227e-05,
+      "loss": 1.3722,
+      "step": 1210
+    },
+    {
+      "epoch": 12.53,
+      "learning_rate": 6.305160179120769e-05,
+      "loss": 1.1787,
+      "step": 1215
+    },
+    {
+      "epoch": 12.58,
+      "learning_rate": 5.209998799327507e-05,
+      "loss": 1.2814,
+      "step": 1220
+    },
+    {
+      "epoch": 12.63,
+      "learning_rate": 4.158012312129902e-05,
+      "loss": 1.2154,
+      "step": 1225
+    },
+    {
+      "epoch": 12.68,
+      "learning_rate": 3.176727608767987e-05,
+      "loss": 1.2798,
+      "step": 1230
+    },
+    {
+      "epoch": 12.73,
+      "learning_rate": 2.291821556419886e-05,
+      "loss": 1.2241,
+      "step": 1235
+    },
+    {
+      "epoch": 12.78,
+      "learning_rate": 1.5264491223375942e-05,
+      "loss": 1.1194,
+      "step": 1240
+    },
+    {
+      "epoch": 12.84,
+      "learning_rate": 9.006374874854777e-06,
+      "loss": 1.1957,
+      "step": 1245
+    },
+    {
+      "epoch": 12.89,
+      "learning_rate": 4.3076200365648044e-06,
+      "loss": 1.1553,
+      "step": 1250
+    },
+    {
+      "epoch": 12.94,
+      "learning_rate": 1.2911770647808012e-06,
+      "loss": 1.1644,
+      "step": 1255
+    },
+    {
+      "epoch": 12.99,
+      "learning_rate": 3.597596347767558e-08,
+      "loss": 1.1646,
+      "step": 1260
+    },
+    {
+      "epoch": 13.0,
+      "eval_loss": 1.0746197700500488,
+      "eval_runtime": 7.0141,
+      "eval_samples_per_second": 22.241,
+      "eval_steps_per_second": 2.851,
+      "step": 1261
     }
   ],
+  "max_steps": 1261,
+  "num_train_epochs": 13,
+  "total_flos": 1307244036096000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54fa34a299d69d7a3304670c0272e7c9b90a901c33b2a0c579d8b695f9d47857
 size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:869572ff71fc1f7eacca418acad1f3acfe2cf9117bc54dd7f9ecad9664ccf36f
 size 2671