AlekseyKorshuk commited on
Commit
191a8f4
1 Parent(s): 79fe28f

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/linkin-park")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1s6xt77z/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Linkin Park's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1i4je18p) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1i4je18p/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/linkin-park")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2b15vg01/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Linkin Park's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/fto51dp4) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/fto51dp4/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.248004913330078, "eval_runtime": 4.6644, "eval_samples_per_second": 20.796, "eval_steps_per_second": 2.787, "epoch": 2.0}
1
+ {"eval_loss": 2.0318267345428467, "eval_runtime": 4.5697, "eval_samples_per_second": 21.008, "eval_steps_per_second": 2.626, "epoch": 3.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:225d151b89706812d44a3b7db2ec9b070074c0f7ba7e9d0ffe24145cc0990adb
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:960e645cfdb5661c8a5575898191f3f17c064dc742a6f5d1c1e623c5a21c709e
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35bd95469cbf0ab080be66768e5242378073c1190b28bd15f2700cf295d526f6
3
  size 995603825
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e18e8cd6f8f5633cbbd1851d80023da278856308536afdaf38cbfb154a8732
3
  size 995603825
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a3eb8db73abd6dfffd1f85d4cbb59cf5647bfbe01d3c02e21a5a72a96906a3a
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a568564c9d7a3f53afa0d2c83e7145788515ff74c6c201a2bf1f4cce94aecb72
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db7af3f0fec071276d1a4d8600845adbb6ac8e80806a422d4311d79890098c71
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:623c33d8f94c376806d8d4f4fd33e1db6afbc9689cfdf6d4fc7c47fa87f23bdd
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03b8c59abd7807eb31fbb6b182b880936157d534c8d17e22038fc44702f9354e
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c6de287d0aeee2c682cd2a01e5e01657f181e757ee594a86aa0945f540d0b1
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.248004913330078,
3
- "best_model_checkpoint": "output/linkin-park/checkpoint-140",
4
- "epoch": 2.0,
5
- "global_step": 140,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -198,11 +198,103 @@
198
  "eval_samples_per_second": 21.058,
199
  "eval_steps_per_second": 2.822,
200
  "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
- "max_steps": 140,
204
- "num_train_epochs": 2,
205
- "total_flos": 144625139712000.0,
206
  "trial_name": null,
207
  "trial_params": null
208
  }
1
  {
2
+ "best_metric": 2.0318267345428467,
3
+ "best_model_checkpoint": "output/linkin-park/checkpoint-210",
4
+ "epoch": 3.0,
5
+ "global_step": 210,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
198
  "eval_samples_per_second": 21.058,
199
  "eval_steps_per_second": 2.822,
200
  "step": 140
201
+ },
202
+ {
203
+ "epoch": 2.07,
204
+ "learning_rate": 0.0001354800547756731,
205
+ "loss": 2.1018,
206
+ "step": 145
207
+ },
208
+ {
209
+ "epoch": 2.14,
210
+ "learning_rate": 0.00013040646433810595,
211
+ "loss": 1.8963,
212
+ "step": 150
213
+ },
214
+ {
215
+ "epoch": 2.21,
216
+ "learning_rate": 0.00012223363969730686,
217
+ "loss": 2.1895,
218
+ "step": 155
219
+ },
220
+ {
221
+ "epoch": 2.29,
222
+ "learning_rate": 0.00011137140040750914,
223
+ "loss": 2.0564,
224
+ "step": 160
225
+ },
226
+ {
227
+ "epoch": 2.36,
228
+ "learning_rate": 9.836442450346452e-05,
229
+ "loss": 2.1397,
230
+ "step": 165
231
+ },
232
+ {
233
+ "epoch": 2.43,
234
+ "learning_rate": 8.386493606940326e-05,
235
+ "loss": 2.3483,
236
+ "step": 170
237
+ },
238
+ {
239
+ "epoch": 2.5,
240
+ "learning_rate": 6.860000000000001e-05,
241
+ "loss": 1.9349,
242
+ "step": 175
243
+ },
244
+ {
245
+ "epoch": 2.57,
246
+ "learning_rate": 5.333506393059685e-05,
247
+ "loss": 1.8439,
248
+ "step": 180
249
+ },
250
+ {
251
+ "epoch": 2.64,
252
+ "learning_rate": 3.8835575496535535e-05,
253
+ "loss": 1.9104,
254
+ "step": 185
255
+ },
256
+ {
257
+ "epoch": 2.71,
258
+ "learning_rate": 2.58285995924909e-05,
259
+ "loss": 1.7206,
260
+ "step": 190
261
+ },
262
+ {
263
+ "epoch": 2.79,
264
+ "learning_rate": 1.496636030269317e-05,
265
+ "loss": 2.2577,
266
+ "step": 195
267
+ },
268
+ {
269
+ "epoch": 2.86,
270
+ "learning_rate": 6.793535661894062e-06,
271
+ "loss": 1.8714,
272
+ "step": 200
273
+ },
274
+ {
275
+ "epoch": 2.93,
276
+ "learning_rate": 1.7199452243269073e-06,
277
+ "loss": 2.2299,
278
+ "step": 205
279
+ },
280
+ {
281
+ "epoch": 3.0,
282
+ "learning_rate": 0.0,
283
+ "loss": 1.8604,
284
+ "step": 210
285
+ },
286
+ {
287
+ "epoch": 3.0,
288
+ "eval_loss": 2.0318267345428467,
289
+ "eval_runtime": 4.5539,
290
+ "eval_samples_per_second": 21.081,
291
+ "eval_steps_per_second": 2.635,
292
+ "step": 210
293
  }
294
  ],
295
+ "max_steps": 210,
296
+ "num_train_epochs": 3,
297
+ "total_flos": 217264324608000.0,
298
  "trial_name": null,
299
  "trial_params": null
300
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c394c879763a7ac8c43760c1791f5a9aee9dc1f21c84e74abc5a6b80af9588a
3
  size 2671
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aada2de1d92f467f8f5669200ff659c45b17df03bf26159792e731f4a3fea9f2
3
  size 2671