AlekseyKorshuk commited on
Commit
18d31e0
1 Parent(s): 28a3ccf

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/andre-3000")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2aj9iybn/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on André 3000's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3lbg2cit) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3lbg2cit/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/andre-3000")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2hnhboqf/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on André 3000's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1mydp6nh) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1mydp6nh/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "huggingartists/andre-3000",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -17,7 +17,9 @@
17
  "n_inner": null,
18
  "n_layer": 12,
19
  "n_positions": 1024,
 
20
  "resid_pdrop": 0.1,
 
21
  "scale_attn_weights": true,
22
  "summary_activation": null,
23
  "summary_first_dropout": 0.1,
@@ -34,7 +36,7 @@
34
  }
35
  },
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.11.3",
38
  "use_cache": true,
39
  "vocab_size": 50257
40
  }
1
  {
2
+ "_name_or_path": "andre-3000",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
17
  "n_inner": null,
18
  "n_layer": 12,
19
  "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
  "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
  "scale_attn_weights": true,
24
  "summary_activation": null,
25
  "summary_first_dropout": 0.1,
36
  }
37
  },
38
  "torch_dtype": "float32",
39
+ "transformers_version": "4.16.2",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.7857184410095215, "eval_runtime": 3.2483, "eval_samples_per_second": 43.715, "eval_steps_per_second": 5.541, "epoch": 105.0}
1
+ {"eval_loss": 2.1131954193115234, "eval_runtime": 1.8568, "eval_samples_per_second": 75.401, "eval_steps_per_second": 9.694, "epoch": 13.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ca2d40ae75a2d9347d2b27e73aece5aa55fffa7ae760d8a75b258044ec799c4
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:989d280b8f58c2ff5da2508491e0ae6778349dc1b34a169c32da297305ff01d6
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f6ec27a8db3a3d8e12f1403c8e1ddd7d9d8f7c369361110364668a7d3905bcc
3
  size 995604017
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:759bb1bfd5a6fd243d563dbd3b6c11508cb5d3cbc90e52a03119979a9d759d12
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:644cac333a83af4607d4d90810953f70e30bdc7159f4a9dc40f8b242c88c2770
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1590cd45efa4fe8174b9c88c70b68bbd7cf75b266511a97a2900be237518f868
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16e34fba951f86dbe52880106da1b406b7bf5d467c83625264b1d27faf3245c0
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b07ce20b7b785ae36b466b0be7ef39edcf348c4b3a68bf0463d5c30234dc25
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4215fb2fd58e7d9127b8dee8649641af0662fd1b11cda980f7e7dc26e6aa301f
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6450797a427e797c153fb64efaf17b83fd9c09303bffab938f0a1b399542842
3
  size 623
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.7857184410095215,
3
- "best_model_checkpoint": "output/andre-3000/checkpoint-990",
4
- "epoch": 10.0,
5
- "global_step": 990,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1274,11 +1274,133 @@
1274
  "eval_samples_per_second": 43.32,
1275
  "eval_steps_per_second": 5.491,
1276
  "step": 990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1277
  }
1278
  ],
1279
- "max_steps": 10395,
1280
- "num_train_epochs": 105,
1281
- "total_flos": 1032103526400000.0,
1282
  "trial_name": null,
1283
  "trial_params": null
1284
  }
1
  {
2
+ "best_metric": 2.1131954193115234,
3
+ "best_model_checkpoint": "output/andre-3000/checkpoint-1089",
4
+ "epoch": 11.0,
5
+ "global_step": 1089,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
1274
  "eval_samples_per_second": 43.32,
1275
  "eval_steps_per_second": 5.491,
1276
  "step": 990
1277
+ },
1278
+ {
1279
+ "epoch": 10.05,
1280
+ "learning_rate": 0.00013633830776320067,
1281
+ "loss": 2.2465,
1282
+ "step": 995
1283
+ },
1284
+ {
1285
+ "epoch": 10.1,
1286
+ "learning_rate": 0.00013377487867702895,
1287
+ "loss": 2.0385,
1288
+ "step": 1000
1289
+ },
1290
+ {
1291
+ "epoch": 10.15,
1292
+ "learning_rate": 0.0001295741117777277,
1293
+ "loss": 2.0847,
1294
+ "step": 1005
1295
+ },
1296
+ {
1297
+ "epoch": 10.2,
1298
+ "learning_rate": 0.00012384153966663063,
1299
+ "loss": 2.0744,
1300
+ "step": 1010
1301
+ },
1302
+ {
1303
+ "epoch": 10.25,
1304
+ "learning_rate": 0.00011672117729665371,
1305
+ "loss": 2.2347,
1306
+ "step": 1015
1307
+ },
1308
+ {
1309
+ "epoch": 10.3,
1310
+ "learning_rate": 0.00010839190399658435,
1311
+ "loss": 2.3577,
1312
+ "step": 1020
1313
+ },
1314
+ {
1315
+ "epoch": 10.35,
1316
+ "learning_rate": 9.906296962475639e-05,
1317
+ "loss": 2.1928,
1318
+ "step": 1025
1319
+ },
1320
+ {
1321
+ "epoch": 10.4,
1322
+ "learning_rate": 8.896873774751959e-05,
1323
+ "loss": 2.3981,
1324
+ "step": 1030
1325
+ },
1326
+ {
1327
+ "epoch": 10.45,
1328
+ "learning_rate": 7.836279790554739e-05,
1329
+ "loss": 2.1466,
1330
+ "step": 1035
1331
+ },
1332
+ {
1333
+ "epoch": 10.51,
1334
+ "learning_rate": 6.75115948809323e-05,
1335
+ "loss": 2.0252,
1336
+ "step": 1040
1337
+ },
1338
+ {
1339
+ "epoch": 10.56,
1340
+ "learning_rate": 5.66877350120488e-05,
1341
+ "loss": 2.1059,
1342
+ "step": 1045
1343
+ },
1344
+ {
1345
+ "epoch": 10.61,
1346
+ "learning_rate": 4.616313771642518e-05,
1347
+ "loss": 2.251,
1348
+ "step": 1050
1349
+ },
1350
+ {
1351
+ "epoch": 10.66,
1352
+ "learning_rate": 3.620220427059434e-05,
1353
+ "loss": 2.332,
1354
+ "step": 1055
1355
+ },
1356
+ {
1357
+ "epoch": 10.71,
1358
+ "learning_rate": 2.7055175462356102e-05,
1359
+ "loss": 2.0934,
1360
+ "step": 1060
1361
+ },
1362
+ {
1363
+ "epoch": 10.76,
1364
+ "learning_rate": 1.8951844985992284e-05,
1365
+ "loss": 2.2116,
1366
+ "step": 1065
1367
+ },
1368
+ {
1369
+ "epoch": 10.81,
1370
+ "learning_rate": 1.2095786513913607e-05,
1371
+ "loss": 2.1227,
1372
+ "step": 1070
1373
+ },
1374
+ {
1375
+ "epoch": 10.86,
1376
+ "learning_rate": 6.659239473537932e-06,
1377
+ "loss": 2.1152,
1378
+ "step": 1075
1379
+ },
1380
+ {
1381
+ "epoch": 10.91,
1382
+ "learning_rate": 2.7787820100456023e-06,
1383
+ "loss": 1.9454,
1384
+ "step": 1080
1385
+ },
1386
+ {
1387
+ "epoch": 10.96,
1388
+ "learning_rate": 5.518998398074473e-07,
1389
+ "loss": 2.208,
1390
+ "step": 1085
1391
+ },
1392
+ {
1393
+ "epoch": 11.0,
1394
+ "eval_loss": 2.1131954193115234,
1395
+ "eval_runtime": 1.8527,
1396
+ "eval_samples_per_second": 75.565,
1397
+ "eval_steps_per_second": 9.716,
1398
+ "step": 1089
1399
  }
1400
  ],
1401
+ "max_steps": 1287,
1402
+ "num_train_epochs": 13,
1403
+ "total_flos": 1135575171072000.0,
1404
  "trial_name": null,
1405
  "trial_params": null
1406
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87b339441b1f6019e8540c7c71ded5ff035d2f0e1f074c763f744b0da4bc0c37
3
- size 2863
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eaf90804f86898f984a6fa365311f924317fbe67459b0c35ed5b4fc325b6372
3
+ size 3055