AlekseyKorshuk commited on
Commit
4cdd93d
1 Parent(s): 92ed093

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/m9zgb2vd/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/ys11x7xp) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/ys11x7xp/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/p956kstb/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1st5hxmj) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1st5hxmj/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.0",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.10.1",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.1428929567337036, "eval_runtime": 7.9754, "eval_samples_per_second": 21.441, "eval_steps_per_second": 2.758, "epoch": 12.0}
 
1
+ {"eval_loss": 1.0746197700500488, "eval_runtime": 7.0746, "eval_samples_per_second": 22.051, "eval_steps_per_second": 2.827, "epoch": 13.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:355b1113b602cec6460d0f66c7318d9309a2d51638c83d166b18454dad89bd5c
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c647ce86d1b1a86724518a6df72da43aedb998658e398e70f58daf9a2243c2d2
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b59045dfc4ed4d02f26069b5cc05179d371e5fa764cb055d1a830e8c0382acd4
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0215949aa5275f39e7df7af91225d000e2c9646cf575dd56a76789468682a08
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dc5e091ec6cb7f165ce89c1bc5ddd005d9d36c5598145f3b40df6a6b9c583cb
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b502aba936ef42281e40d36ce74875ad930f76b429614a069ad2a0cf5d62940e
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c63f55fe8fef4a486cd65797398129fb611c4447a861094695f459a188101b5d
3
- size 14631
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f7ad53ae8cbda635558cf1458df6c46c041d6541dd1606dd52a4492e1dfbd5
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e825854d4daee9b3baa1369de3bcfdc0baa5bcd259634286f62b5236f6992a0f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbd7e7d8b60f58c4a16b003557100be8552014654bb06fb1afc69d6829aafac8
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.1428929567337036,
3
- "best_model_checkpoint": "output/morgenshtern/checkpoint-1140",
4
- "epoch": 12.0,
5
- "global_step": 1140,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1494,11 +1494,171 @@
1494
  "eval_samples_per_second": 21.48,
1495
  "eval_steps_per_second": 2.764,
1496
  "step": 1140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1497
  }
1498
  ],
1499
- "max_steps": 1140,
1500
- "num_train_epochs": 12,
1501
- "total_flos": 1182346444800000.0,
1502
  "trial_name": null,
1503
  "trial_params": null
1504
  }
 
1
  {
2
+ "best_metric": 1.0746197700500488,
3
+ "best_model_checkpoint": "output/morgenshtern/checkpoint-1261",
4
+ "epoch": 13.0,
5
+ "global_step": 1261,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1494
  "eval_samples_per_second": 21.48,
1495
  "eval_steps_per_second": 2.764,
1496
  "step": 1140
1497
+ },
1498
+ {
1499
+ "epoch": 11.8,
1500
+ "learning_rate": 0.00012461626728572456,
1501
+ "loss": 1.2708,
1502
+ "step": 1145
1503
+ },
1504
+ {
1505
+ "epoch": 11.86,
1506
+ "learning_rate": 0.000130268089438458,
1507
+ "loss": 1.2385,
1508
+ "step": 1150
1509
+ },
1510
+ {
1511
+ "epoch": 11.91,
1512
+ "learning_rate": 0.00013430626843929596,
1513
+ "loss": 1.2608,
1514
+ "step": 1155
1515
+ },
1516
+ {
1517
+ "epoch": 11.96,
1518
+ "learning_rate": 0.00013662513894413278,
1519
+ "loss": 1.2132,
1520
+ "step": 1160
1521
+ },
1522
+ {
1523
+ "epoch": 12.0,
1524
+ "eval_loss": 1.1082079410552979,
1525
+ "eval_runtime": 6.721,
1526
+ "eval_samples_per_second": 23.211,
1527
+ "eval_steps_per_second": 2.976,
1528
+ "step": 1164
1529
+ },
1530
+ {
1531
+ "epoch": 12.01,
1532
+ "learning_rate": 0.00013716402403652231,
1533
+ "loss": 1.2701,
1534
+ "step": 1165
1535
+ },
1536
+ {
1537
+ "epoch": 12.06,
1538
+ "learning_rate": 0.0001359088229352192,
1539
+ "loss": 1.2274,
1540
+ "step": 1170
1541
+ },
1542
+ {
1543
+ "epoch": 12.11,
1544
+ "learning_rate": 0.0001328923799634352,
1545
+ "loss": 1.1978,
1546
+ "step": 1175
1547
+ },
1548
+ {
1549
+ "epoch": 12.16,
1550
+ "learning_rate": 0.0001281936251251452,
1551
+ "loss": 1.0879,
1552
+ "step": 1180
1553
+ },
1554
+ {
1555
+ "epoch": 12.22,
1556
+ "learning_rate": 0.00012193550877662404,
1557
+ "loss": 1.2841,
1558
+ "step": 1185
1559
+ },
1560
+ {
1561
+ "epoch": 12.27,
1562
+ "learning_rate": 0.00011428178443580113,
1563
+ "loss": 1.166,
1564
+ "step": 1190
1565
+ },
1566
+ {
1567
+ "epoch": 12.32,
1568
+ "learning_rate": 0.0001054327239123201,
1569
+ "loss": 1.2385,
1570
+ "step": 1195
1571
+ },
1572
+ {
1573
+ "epoch": 12.37,
1574
+ "learning_rate": 9.561987687870095e-05,
1575
+ "loss": 1.1758,
1576
+ "step": 1200
1577
+ },
1578
+ {
1579
+ "epoch": 12.42,
1580
+ "learning_rate": 8.51000120067249e-05,
1581
+ "loss": 1.1698,
1582
+ "step": 1205
1583
+ },
1584
+ {
1585
+ "epoch": 12.47,
1586
+ "learning_rate": 7.414839820879227e-05,
1587
+ "loss": 1.3722,
1588
+ "step": 1210
1589
+ },
1590
+ {
1591
+ "epoch": 12.53,
1592
+ "learning_rate": 6.305160179120769e-05,
1593
+ "loss": 1.1787,
1594
+ "step": 1215
1595
+ },
1596
+ {
1597
+ "epoch": 12.58,
1598
+ "learning_rate": 5.209998799327507e-05,
1599
+ "loss": 1.2814,
1600
+ "step": 1220
1601
+ },
1602
+ {
1603
+ "epoch": 12.63,
1604
+ "learning_rate": 4.158012312129902e-05,
1605
+ "loss": 1.2154,
1606
+ "step": 1225
1607
+ },
1608
+ {
1609
+ "epoch": 12.68,
1610
+ "learning_rate": 3.176727608767987e-05,
1611
+ "loss": 1.2798,
1612
+ "step": 1230
1613
+ },
1614
+ {
1615
+ "epoch": 12.73,
1616
+ "learning_rate": 2.291821556419886e-05,
1617
+ "loss": 1.2241,
1618
+ "step": 1235
1619
+ },
1620
+ {
1621
+ "epoch": 12.78,
1622
+ "learning_rate": 1.5264491223375942e-05,
1623
+ "loss": 1.1194,
1624
+ "step": 1240
1625
+ },
1626
+ {
1627
+ "epoch": 12.84,
1628
+ "learning_rate": 9.006374874854777e-06,
1629
+ "loss": 1.1957,
1630
+ "step": 1245
1631
+ },
1632
+ {
1633
+ "epoch": 12.89,
1634
+ "learning_rate": 4.3076200365648044e-06,
1635
+ "loss": 1.1553,
1636
+ "step": 1250
1637
+ },
1638
+ {
1639
+ "epoch": 12.94,
1640
+ "learning_rate": 1.2911770647808012e-06,
1641
+ "loss": 1.1644,
1642
+ "step": 1255
1643
+ },
1644
+ {
1645
+ "epoch": 12.99,
1646
+ "learning_rate": 3.597596347767558e-08,
1647
+ "loss": 1.1646,
1648
+ "step": 1260
1649
+ },
1650
+ {
1651
+ "epoch": 13.0,
1652
+ "eval_loss": 1.0746197700500488,
1653
+ "eval_runtime": 7.0141,
1654
+ "eval_samples_per_second": 22.241,
1655
+ "eval_steps_per_second": 2.851,
1656
+ "step": 1261
1657
  }
1658
  ],
1659
+ "max_steps": 1261,
1660
+ "num_train_epochs": 13,
1661
+ "total_flos": 1307244036096000.0,
1662
  "trial_name": null,
1663
  "trial_params": null
1664
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54fa34a299d69d7a3304670c0272e7c9b90a901c33b2a0c579d8b695f9d47857
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:869572ff71fc1f7eacca418acad1f3acfe2cf9117bc54dd7f9ecad9664ccf36f
3
  size 2671