AlekseyKorshuk
commited on
Commit
•
4cdd93d
1
Parent(s):
92ed093
huggingartists
Browse files- README.md +3 -3
- config.json +1 -1
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +2 -2
- scheduler.pt +1 -1
- trainer_state.json +167 -7
- training_args.bin +1 -1
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/morgenshtern")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/morgenshtern")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/p956kstb/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1st5hxmj) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1st5hxmj/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
config.json
CHANGED
@@ -35,7 +35,7 @@
|
|
35 |
}
|
36 |
},
|
37 |
"torch_dtype": "float32",
|
38 |
-
"transformers_version": "4.10.
|
39 |
"use_cache": true,
|
40 |
"vocab_size": 50257
|
41 |
}
|
|
|
35 |
}
|
36 |
},
|
37 |
"torch_dtype": "float32",
|
38 |
+
"transformers_version": "4.10.1",
|
39 |
"use_cache": true,
|
40 |
"vocab_size": 50257
|
41 |
}
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss": 1.
|
|
|
1 |
+
{"eval_loss": 1.0746197700500488, "eval_runtime": 7.0746, "eval_samples_per_second": 22.051, "eval_steps_per_second": 2.827, "epoch": 13.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c647ce86d1b1a86724518a6df72da43aedb998658e398e70f58daf9a2243c2d2
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0215949aa5275f39e7df7af91225d000e2c9646cf575dd56a76789468682a08
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510403817
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b502aba936ef42281e40d36ce74875ad930f76b429614a069ad2a0cf5d62940e
|
3 |
size 510403817
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64f7ad53ae8cbda635558cf1458df6c46c041d6541dd1606dd52a4492e1dfbd5
|
3 |
+
size 14503
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbd7e7d8b60f58c4a16b003557100be8552014654bb06fb1afc69d6829aafac8
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "output/morgenshtern/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1494,11 +1494,171 @@
|
|
1494 |
"eval_samples_per_second": 21.48,
|
1495 |
"eval_steps_per_second": 2.764,
|
1496 |
"step": 1140
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1497 |
}
|
1498 |
],
|
1499 |
-
"max_steps":
|
1500 |
-
"num_train_epochs":
|
1501 |
-
"total_flos":
|
1502 |
"trial_name": null,
|
1503 |
"trial_params": null
|
1504 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.0746197700500488,
|
3 |
+
"best_model_checkpoint": "output/morgenshtern/checkpoint-1261",
|
4 |
+
"epoch": 13.0,
|
5 |
+
"global_step": 1261,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1494 |
"eval_samples_per_second": 21.48,
|
1495 |
"eval_steps_per_second": 2.764,
|
1496 |
"step": 1140
|
1497 |
+
},
|
1498 |
+
{
|
1499 |
+
"epoch": 11.8,
|
1500 |
+
"learning_rate": 0.00012461626728572456,
|
1501 |
+
"loss": 1.2708,
|
1502 |
+
"step": 1145
|
1503 |
+
},
|
1504 |
+
{
|
1505 |
+
"epoch": 11.86,
|
1506 |
+
"learning_rate": 0.000130268089438458,
|
1507 |
+
"loss": 1.2385,
|
1508 |
+
"step": 1150
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 11.91,
|
1512 |
+
"learning_rate": 0.00013430626843929596,
|
1513 |
+
"loss": 1.2608,
|
1514 |
+
"step": 1155
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 11.96,
|
1518 |
+
"learning_rate": 0.00013662513894413278,
|
1519 |
+
"loss": 1.2132,
|
1520 |
+
"step": 1160
|
1521 |
+
},
|
1522 |
+
{
|
1523 |
+
"epoch": 12.0,
|
1524 |
+
"eval_loss": 1.1082079410552979,
|
1525 |
+
"eval_runtime": 6.721,
|
1526 |
+
"eval_samples_per_second": 23.211,
|
1527 |
+
"eval_steps_per_second": 2.976,
|
1528 |
+
"step": 1164
|
1529 |
+
},
|
1530 |
+
{
|
1531 |
+
"epoch": 12.01,
|
1532 |
+
"learning_rate": 0.00013716402403652231,
|
1533 |
+
"loss": 1.2701,
|
1534 |
+
"step": 1165
|
1535 |
+
},
|
1536 |
+
{
|
1537 |
+
"epoch": 12.06,
|
1538 |
+
"learning_rate": 0.0001359088229352192,
|
1539 |
+
"loss": 1.2274,
|
1540 |
+
"step": 1170
|
1541 |
+
},
|
1542 |
+
{
|
1543 |
+
"epoch": 12.11,
|
1544 |
+
"learning_rate": 0.0001328923799634352,
|
1545 |
+
"loss": 1.1978,
|
1546 |
+
"step": 1175
|
1547 |
+
},
|
1548 |
+
{
|
1549 |
+
"epoch": 12.16,
|
1550 |
+
"learning_rate": 0.0001281936251251452,
|
1551 |
+
"loss": 1.0879,
|
1552 |
+
"step": 1180
|
1553 |
+
},
|
1554 |
+
{
|
1555 |
+
"epoch": 12.22,
|
1556 |
+
"learning_rate": 0.00012193550877662404,
|
1557 |
+
"loss": 1.2841,
|
1558 |
+
"step": 1185
|
1559 |
+
},
|
1560 |
+
{
|
1561 |
+
"epoch": 12.27,
|
1562 |
+
"learning_rate": 0.00011428178443580113,
|
1563 |
+
"loss": 1.166,
|
1564 |
+
"step": 1190
|
1565 |
+
},
|
1566 |
+
{
|
1567 |
+
"epoch": 12.32,
|
1568 |
+
"learning_rate": 0.0001054327239123201,
|
1569 |
+
"loss": 1.2385,
|
1570 |
+
"step": 1195
|
1571 |
+
},
|
1572 |
+
{
|
1573 |
+
"epoch": 12.37,
|
1574 |
+
"learning_rate": 9.561987687870095e-05,
|
1575 |
+
"loss": 1.1758,
|
1576 |
+
"step": 1200
|
1577 |
+
},
|
1578 |
+
{
|
1579 |
+
"epoch": 12.42,
|
1580 |
+
"learning_rate": 8.51000120067249e-05,
|
1581 |
+
"loss": 1.1698,
|
1582 |
+
"step": 1205
|
1583 |
+
},
|
1584 |
+
{
|
1585 |
+
"epoch": 12.47,
|
1586 |
+
"learning_rate": 7.414839820879227e-05,
|
1587 |
+
"loss": 1.3722,
|
1588 |
+
"step": 1210
|
1589 |
+
},
|
1590 |
+
{
|
1591 |
+
"epoch": 12.53,
|
1592 |
+
"learning_rate": 6.305160179120769e-05,
|
1593 |
+
"loss": 1.1787,
|
1594 |
+
"step": 1215
|
1595 |
+
},
|
1596 |
+
{
|
1597 |
+
"epoch": 12.58,
|
1598 |
+
"learning_rate": 5.209998799327507e-05,
|
1599 |
+
"loss": 1.2814,
|
1600 |
+
"step": 1220
|
1601 |
+
},
|
1602 |
+
{
|
1603 |
+
"epoch": 12.63,
|
1604 |
+
"learning_rate": 4.158012312129902e-05,
|
1605 |
+
"loss": 1.2154,
|
1606 |
+
"step": 1225
|
1607 |
+
},
|
1608 |
+
{
|
1609 |
+
"epoch": 12.68,
|
1610 |
+
"learning_rate": 3.176727608767987e-05,
|
1611 |
+
"loss": 1.2798,
|
1612 |
+
"step": 1230
|
1613 |
+
},
|
1614 |
+
{
|
1615 |
+
"epoch": 12.73,
|
1616 |
+
"learning_rate": 2.291821556419886e-05,
|
1617 |
+
"loss": 1.2241,
|
1618 |
+
"step": 1235
|
1619 |
+
},
|
1620 |
+
{
|
1621 |
+
"epoch": 12.78,
|
1622 |
+
"learning_rate": 1.5264491223375942e-05,
|
1623 |
+
"loss": 1.1194,
|
1624 |
+
"step": 1240
|
1625 |
+
},
|
1626 |
+
{
|
1627 |
+
"epoch": 12.84,
|
1628 |
+
"learning_rate": 9.006374874854777e-06,
|
1629 |
+
"loss": 1.1957,
|
1630 |
+
"step": 1245
|
1631 |
+
},
|
1632 |
+
{
|
1633 |
+
"epoch": 12.89,
|
1634 |
+
"learning_rate": 4.3076200365648044e-06,
|
1635 |
+
"loss": 1.1553,
|
1636 |
+
"step": 1250
|
1637 |
+
},
|
1638 |
+
{
|
1639 |
+
"epoch": 12.94,
|
1640 |
+
"learning_rate": 1.2911770647808012e-06,
|
1641 |
+
"loss": 1.1644,
|
1642 |
+
"step": 1255
|
1643 |
+
},
|
1644 |
+
{
|
1645 |
+
"epoch": 12.99,
|
1646 |
+
"learning_rate": 3.597596347767558e-08,
|
1647 |
+
"loss": 1.1646,
|
1648 |
+
"step": 1260
|
1649 |
+
},
|
1650 |
+
{
|
1651 |
+
"epoch": 13.0,
|
1652 |
+
"eval_loss": 1.0746197700500488,
|
1653 |
+
"eval_runtime": 7.0141,
|
1654 |
+
"eval_samples_per_second": 22.241,
|
1655 |
+
"eval_steps_per_second": 2.851,
|
1656 |
+
"step": 1261
|
1657 |
}
|
1658 |
],
|
1659 |
+
"max_steps": 1261,
|
1660 |
+
"num_train_epochs": 13,
|
1661 |
+
"total_flos": 1307244036096000.0,
|
1662 |
"trial_name": null,
|
1663 |
"trial_params": null
|
1664 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2671
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:869572ff71fc1f7eacca418acad1f3acfe2cf9117bc54dd7f9ecad9664ccf36f
|
3 |
size 2671
|