AlekseyKorshuk commited on
Commit
c650132
1 Parent(s): 44865d9

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/taylor-swift")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11hietbj/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Taylor Swift's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/jwz5zda0) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/jwz5zda0/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/taylor-swift")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2l84tzp2/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Taylor Swift's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1hy7aa65) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1hy7aa65/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -36,7 +36,7 @@
36
  }
37
  },
38
  "torch_dtype": "float32",
39
- "transformers_version": "4.16.2",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
36
  }
37
  },
38
  "torch_dtype": "float32",
39
+ "transformers_version": "4.20.1",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.4377235174179077, "eval_runtime": 9.5718, "eval_samples_per_second": 20.895, "eval_steps_per_second": 2.612, "epoch": 7.0}
1
+ {"eval_loss": 1.3240652084350586, "eval_runtime": 5.3381, "eval_samples_per_second": 42.712, "eval_steps_per_second": 5.433, "epoch": 12.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76ae138578ec229ab199e35e58682cc0818297990880e98b41bb7caca21799bb
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0193dabaedc886d2423381b1d4991d098c388ec01408d24cb8525d1c5031eb2
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd96b2ecf6961b37a51c11488af2110f05d27248a9d735d9cec37797c7c45cbe
3
  size 995604017
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fc77d09c4c454b54d1ca6720bc643fbbdc7dae8b9f46f9dfbe190c61e89c49b
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a05cf58e9ce70f6bc6054d004220f5b59634b254e9d1c7c20c77dd1d160dacdc
3
- size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e35f64d133e2da786674123c81f13fa6ce6316d7e8029fc026e6c67f01baf8b
3
+ size 510396521
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0917e5420fc90d80d96ba9647582ee18ea3c8fd35cfb2e8cff174e79d4c678c0
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:664be080bd3dc652f628b2caf10ec5043081add1f81d7f2e4ab19f1da66a9fd6
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:696255b08bfcc0a618196cb4d29aaed44996ae47e5ca36d14f60bf60ece9f170
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b79408328cb94b5f58b90ffdd8b7127d5992674b5e2cf7df0005f8718053694
3
  size 623
special_tokens_map.json CHANGED
@@ -1 +1,5 @@
1
- {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer.json CHANGED
@@ -5,29 +5,32 @@
5
  "added_tokens": [
6
  {
7
  "id": 50256,
8
- "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
- "normalized": false
 
14
  }
15
  ],
16
  "normalizer": null,
17
  "pre_tokenizer": {
18
  "type": "ByteLevel",
19
  "add_prefix_space": false,
20
- "trim_offsets": true
 
21
  },
22
  "post_processor": {
23
  "type": "ByteLevel",
24
  "add_prefix_space": true,
25
- "trim_offsets": false
 
26
  },
27
  "decoder": {
28
  "type": "ByteLevel",
29
  "add_prefix_space": true,
30
- "trim_offsets": true
 
31
  },
32
  "model": {
33
  "type": "BPE",
5
  "added_tokens": [
6
  {
7
  "id": 50256,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
  }
15
  ],
16
  "normalizer": null,
17
  "pre_tokenizer": {
18
  "type": "ByteLevel",
19
  "add_prefix_space": false,
20
+ "trim_offsets": true,
21
+ "use_regex": true
22
  },
23
  "post_processor": {
24
  "type": "ByteLevel",
25
  "add_prefix_space": true,
26
+ "trim_offsets": false,
27
+ "use_regex": true
28
  },
29
  "decoder": {
30
  "type": "ByteLevel",
31
  "add_prefix_space": true,
32
+ "trim_offsets": true,
33
+ "use_regex": true
34
  },
35
  "model": {
36
  "type": "BPE",
tokenizer_config.json CHANGED
@@ -1 +1,10 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/taylor-swift", "tokenizer_class": "GPT2Tokenizer"}
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1024,
6
+ "name_or_path": "huggingartists/taylor-swift",
7
+ "special_tokens_map_file": null,
8
+ "tokenizer_class": "GPT2Tokenizer",
9
+ "unk_token": "<|endoftext|>"
10
+ }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.4377235174179077,
3
- "best_model_checkpoint": "output/taylor-swift/checkpoint-1120",
4
- "epoch": 7.0,
5
- "global_step": 1120,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1414,11 +1414,375 @@
1414
  "eval_samples_per_second": 21.045,
1415
  "eval_steps_per_second": 2.631,
1416
  "step": 1120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1417
  }
1418
  ],
1419
- "max_steps": 1120,
1420
- "num_train_epochs": 7,
1421
- "total_flos": 1168759259136000.0,
1422
  "trial_name": null,
1423
  "trial_params": null
1424
  }
1
  {
2
+ "best_metric": 1.3240652084350586,
3
+ "best_model_checkpoint": "output/taylor-swift/checkpoint-1413",
4
+ "epoch": 9.0,
5
+ "global_step": 1413,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
1414
  "eval_samples_per_second": 21.045,
1415
  "eval_steps_per_second": 2.631,
1416
  "step": 1120
1417
+ },
1418
+ {
1419
+ "epoch": 7.17,
1420
+ "learning_rate": 9.076596574074994e-06,
1421
+ "loss": 1.4269,
1422
+ "step": 1125
1423
+ },
1424
+ {
1425
+ "epoch": 7.2,
1426
+ "learning_rate": 1.2780475216607764e-05,
1427
+ "loss": 1.559,
1428
+ "step": 1130
1429
+ },
1430
+ {
1431
+ "epoch": 7.23,
1432
+ "learning_rate": 1.7042649548372873e-05,
1433
+ "loss": 1.529,
1434
+ "step": 1135
1435
+ },
1436
+ {
1437
+ "epoch": 7.26,
1438
+ "learning_rate": 2.182049015671568e-05,
1439
+ "loss": 1.6687,
1440
+ "step": 1140
1441
+ },
1442
+ {
1443
+ "epoch": 7.29,
1444
+ "learning_rate": 2.7066210038327817e-05,
1445
+ "loss": 1.5638,
1446
+ "step": 1145
1447
+ },
1448
+ {
1449
+ "epoch": 7.32,
1450
+ "learning_rate": 3.2727342555268683e-05,
1451
+ "loss": 1.5554,
1452
+ "step": 1150
1453
+ },
1454
+ {
1455
+ "epoch": 7.36,
1456
+ "learning_rate": 3.874726619575346e-05,
1457
+ "loss": 1.317,
1458
+ "step": 1155
1459
+ },
1460
+ {
1461
+ "epoch": 7.39,
1462
+ "learning_rate": 4.5065770891153554e-05,
1463
+ "loss": 1.679,
1464
+ "step": 1160
1465
+ },
1466
+ {
1467
+ "epoch": 7.42,
1468
+ "learning_rate": 5.161966022502662e-05,
1469
+ "loss": 1.6683,
1470
+ "step": 1165
1471
+ },
1472
+ {
1473
+ "epoch": 7.45,
1474
+ "learning_rate": 5.834338351099537e-05,
1475
+ "loss": 1.4034,
1476
+ "step": 1170
1477
+ },
1478
+ {
1479
+ "epoch": 7.48,
1480
+ "learning_rate": 6.516969141756308e-05,
1481
+ "loss": 1.6013,
1482
+ "step": 1175
1483
+ },
1484
+ {
1485
+ "epoch": 7.52,
1486
+ "learning_rate": 7.20303085824368e-05,
1487
+ "loss": 1.5516,
1488
+ "step": 1180
1489
+ },
1490
+ {
1491
+ "epoch": 7.55,
1492
+ "learning_rate": 7.885661648900452e-05,
1493
+ "loss": 1.5248,
1494
+ "step": 1185
1495
+ },
1496
+ {
1497
+ "epoch": 7.58,
1498
+ "learning_rate": 8.558033977497326e-05,
1499
+ "loss": 1.3379,
1500
+ "step": 1190
1501
+ },
1502
+ {
1503
+ "epoch": 7.61,
1504
+ "learning_rate": 9.213422910884634e-05,
1505
+ "loss": 1.3994,
1506
+ "step": 1195
1507
+ },
1508
+ {
1509
+ "epoch": 7.64,
1510
+ "learning_rate": 9.845273380424641e-05,
1511
+ "loss": 1.4434,
1512
+ "step": 1200
1513
+ },
1514
+ {
1515
+ "epoch": 7.68,
1516
+ "learning_rate": 0.00010447265744473122,
1517
+ "loss": 1.6187,
1518
+ "step": 1205
1519
+ },
1520
+ {
1521
+ "epoch": 7.71,
1522
+ "learning_rate": 0.00011013378996167208,
1523
+ "loss": 1.5693,
1524
+ "step": 1210
1525
+ },
1526
+ {
1527
+ "epoch": 7.74,
1528
+ "learning_rate": 0.00011537950984328424,
1529
+ "loss": 1.4006,
1530
+ "step": 1215
1531
+ },
1532
+ {
1533
+ "epoch": 7.77,
1534
+ "learning_rate": 0.00012015735045162704,
1535
+ "loss": 1.7153,
1536
+ "step": 1220
1537
+ },
1538
+ {
1539
+ "epoch": 7.8,
1540
+ "learning_rate": 0.0001244195247833922,
1541
+ "loss": 1.5277,
1542
+ "step": 1225
1543
+ },
1544
+ {
1545
+ "epoch": 7.83,
1546
+ "learning_rate": 0.00012812340342592494,
1547
+ "loss": 1.5052,
1548
+ "step": 1230
1549
+ },
1550
+ {
1551
+ "epoch": 7.87,
1552
+ "learning_rate": 0.0001312319409280581,
1553
+ "loss": 1.548,
1554
+ "step": 1235
1555
+ },
1556
+ {
1557
+ "epoch": 7.9,
1558
+ "learning_rate": 0.00013371404632128166,
1559
+ "loss": 1.4896,
1560
+ "step": 1240
1561
+ },
1562
+ {
1563
+ "epoch": 7.93,
1564
+ "learning_rate": 0.0001355448940853745,
1565
+ "loss": 1.5506,
1566
+ "step": 1245
1567
+ },
1568
+ {
1569
+ "epoch": 7.96,
1570
+ "learning_rate": 0.00013670617244827653,
1571
+ "loss": 1.6902,
1572
+ "step": 1250
1573
+ },
1574
+ {
1575
+ "epoch": 7.99,
1576
+ "learning_rate": 0.0001371862665367597,
1577
+ "loss": 1.5112,
1578
+ "step": 1255
1579
+ },
1580
+ {
1581
+ "epoch": 8.0,
1582
+ "eval_loss": 1.352002739906311,
1583
+ "eval_runtime": 5.5761,
1584
+ "eval_samples_per_second": 40.889,
1585
+ "eval_steps_per_second": 5.201,
1586
+ "step": 1256
1587
+ },
1588
+ {
1589
+ "epoch": 8.03,
1590
+ "learning_rate": 0.00013698037454606005,
1591
+ "loss": 1.3097,
1592
+ "step": 1260
1593
+ },
1594
+ {
1595
+ "epoch": 8.06,
1596
+ "learning_rate": 0.0001360905557665658,
1597
+ "loss": 1.5119,
1598
+ "step": 1265
1599
+ },
1600
+ {
1601
+ "epoch": 8.09,
1602
+ "learning_rate": 0.00013452570998720767,
1603
+ "loss": 1.2738,
1604
+ "step": 1270
1605
+ },
1606
+ {
1607
+ "epoch": 8.12,
1608
+ "learning_rate": 0.00013230148848155559,
1609
+ "loss": 1.2764,
1610
+ "step": 1275
1611
+ },
1612
+ {
1613
+ "epoch": 8.15,
1614
+ "learning_rate": 0.00012944013746692,
1615
+ "loss": 1.2347,
1616
+ "step": 1280
1617
+ },
1618
+ {
1619
+ "epoch": 8.18,
1620
+ "learning_rate": 0.00012597027560214946,
1621
+ "loss": 1.4534,
1622
+ "step": 1285
1623
+ },
1624
+ {
1625
+ "epoch": 8.22,
1626
+ "learning_rate": 0.00012192660774954517,
1627
+ "loss": 1.4193,
1628
+ "step": 1290
1629
+ },
1630
+ {
1631
+ "epoch": 8.25,
1632
+ "learning_rate": 0.00011734957786379066,
1633
+ "loss": 1.5185,
1634
+ "step": 1295
1635
+ },
1636
+ {
1637
+ "epoch": 8.28,
1638
+ "learning_rate": 0.00011228496447963,
1639
+ "loss": 1.5225,
1640
+ "step": 1300
1641
+ },
1642
+ {
1643
+ "epoch": 8.31,
1644
+ "learning_rate": 0.0001067834228441478,
1645
+ "loss": 1.8013,
1646
+ "step": 1305
1647
+ },
1648
+ {
1649
+ "epoch": 8.34,
1650
+ "learning_rate": 0.00010089997827314661,
1651
+ "loss": 1.4035,
1652
+ "step": 1310
1653
+ },
1654
+ {
1655
+ "epoch": 8.38,
1656
+ "learning_rate": 9.469347579898059e-05,
1657
+ "loss": 1.4431,
1658
+ "step": 1315
1659
+ },
1660
+ {
1661
+ "epoch": 8.41,
1662
+ "learning_rate": 8.82259916143434e-05,
1663
+ "loss": 1.3074,
1664
+ "step": 1320
1665
+ },
1666
+ {
1667
+ "epoch": 8.44,
1668
+ "learning_rate": 8.1562212198643e-05,
1669
+ "loss": 1.3587,
1670
+ "step": 1325
1671
+ },
1672
+ {
1673
+ "epoch": 8.47,
1674
+ "learning_rate": 7.476878733681043e-05,
1675
+ "loss": 1.5297,
1676
+ "step": 1330
1677
+ },
1678
+ {
1679
+ "epoch": 8.5,
1680
+ "learning_rate": 6.791366350152217e-05,
1681
+ "loss": 1.2815,
1682
+ "step": 1335
1683
+ },
1684
+ {
1685
+ "epoch": 8.54,
1686
+ "learning_rate": 6.106540426620946e-05,
1687
+ "loss": 1.5607,
1688
+ "step": 1340
1689
+ },
1690
+ {
1691
+ "epoch": 8.57,
1692
+ "learning_rate": 5.4292504545952105e-05,
1693
+ "loss": 1.1765,
1694
+ "step": 1345
1695
+ },
1696
+ {
1697
+ "epoch": 8.6,
1698
+ "learning_rate": 4.766270552507704e-05,
1699
+ "loss": 1.5363,
1700
+ "step": 1350
1701
+ },
1702
+ {
1703
+ "epoch": 8.63,
1704
+ "learning_rate": 4.124231712342338e-05,
1705
+ "loss": 1.4107,
1706
+ "step": 1355
1707
+ },
1708
+ {
1709
+ "epoch": 8.66,
1710
+ "learning_rate": 3.509555477782507e-05,
1711
+ "loss": 1.4608,
1712
+ "step": 1360
1713
+ },
1714
+ {
1715
+ "epoch": 8.69,
1716
+ "learning_rate": 2.928389717219465e-05,
1717
+ "loss": 1.3783,
1718
+ "step": 1365
1719
+ },
1720
+ {
1721
+ "epoch": 8.73,
1722
+ "learning_rate": 2.386547134005838e-05,
1723
+ "loss": 1.2348,
1724
+ "step": 1370
1725
+ },
1726
+ {
1727
+ "epoch": 8.76,
1728
+ "learning_rate": 1.889447128962836e-05,
1729
+ "loss": 1.5068,
1730
+ "step": 1375
1731
+ },
1732
+ {
1733
+ "epoch": 8.79,
1734
+ "learning_rate": 1.4420615966203568e-05,
1735
+ "loss": 1.3345,
1736
+ "step": 1380
1737
+ },
1738
+ {
1739
+ "epoch": 8.82,
1740
+ "learning_rate": 1.0488651973253671e-05,
1741
+ "loss": 1.2429,
1742
+ "step": 1385
1743
+ },
1744
+ {
1745
+ "epoch": 8.85,
1746
+ "learning_rate": 7.13790602586542e-06,
1747
+ "loss": 1.3129,
1748
+ "step": 1390
1749
+ },
1750
+ {
1751
+ "epoch": 8.89,
1752
+ "learning_rate": 4.401891612830206e-06,
1753
+ "loss": 1.2724,
1754
+ "step": 1395
1755
+ },
1756
+ {
1757
+ "epoch": 8.92,
1758
+ "learning_rate": 2.307973801450113e-06,
1759
+ "loss": 1.3608,
1760
+ "step": 1400
1761
+ },
1762
+ {
1763
+ "epoch": 8.95,
1764
+ "learning_rate": 8.770955376250992e-07,
1765
+ "loss": 1.4328,
1766
+ "step": 1405
1767
+ },
1768
+ {
1769
+ "epoch": 8.98,
1770
+ "learning_rate": 1.2356817870921626e-07,
1771
+ "loss": 1.3714,
1772
+ "step": 1410
1773
+ },
1774
+ {
1775
+ "epoch": 9.0,
1776
+ "eval_loss": 1.3240652084350586,
1777
+ "eval_runtime": 5.3842,
1778
+ "eval_samples_per_second": 42.346,
1779
+ "eval_steps_per_second": 5.386,
1780
+ "step": 1413
1781
  }
1782
  ],
1783
+ "max_steps": 1884,
1784
+ "num_train_epochs": 12,
1785
+ "total_flos": 1473164476416000.0,
1786
  "trial_name": null,
1787
  "trial_params": null
1788
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ca623b70d31f90284e0c9f8a31f7cb178f04870d48cd416af0fe84dfe57c1e5
3
- size 3055
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c178f13ee8a4a4e92389f456bb271b331e24a800185642959a69b718d48e7169
3
+ size 3311