error577 commited on
Commit
1c1b2b6
·
verified ·
1 Parent(s): e72ca38

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b744b1ce429a59148cc3edb86a39988f0c7f82f2f4cec917ab4700516a1d2d4d
3
  size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b81c89bf5a89dea220e294a4652d890349bf12553800a49024591d26ca2757a4
3
  size 323014168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4435c9dd46ee360c2866c624c80beecd8c54ccd488b1f5ad0e271d9c35a6382a
3
  size 164464564
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4536e6d49005fda0a95151faebf86801ba2e3b8da30b27d77d184347ee694363
3
  size 164464564
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7726f50d6970cd5b957fbd3f859f311c8be3d0ced4fed6a04709ea0c71181063
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:356dcfdc03c399d2e663c95cf1133f32813c707adc33ad61bc750dcc5222213f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47345b2adedc866297f19eccd74a7db75a3d504bcbba513f5e3ae09a2efa0798
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517cde929c0b918b0e53e0ffd764ecc43637194fb41b83640993bbae7c21d100
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.42559075355529785,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 0.33948652662847445,
5
  "eval_steps": 50,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1447,6 +1447,364 @@
1447
  "eval_samples_per_second": 2.941,
1448
  "eval_steps_per_second": 2.941,
1449
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1450
  }
1451
  ],
1452
  "logging_steps": 1,
@@ -1475,7 +1833,7 @@
1475
  "attributes": {}
1476
  }
1477
  },
1478
- "total_flos": 1.29551053519061e+17,
1479
  "train_batch_size": 1,
1480
  "trial_name": null,
1481
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.39342138171195984,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
+ "epoch": 0.42435815828559303,
5
  "eval_steps": 50,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1447
  "eval_samples_per_second": 2.941,
1448
  "eval_steps_per_second": 2.941,
1449
  "step": 200
1450
+ },
1451
+ {
1452
+ "epoch": 0.3411839592616168,
1453
+ "grad_norm": 0.5887985825538635,
1454
+ "learning_rate": 0.0002732809457224292,
1455
+ "loss": 0.9434,
1456
+ "step": 201
1457
+ },
1458
+ {
1459
+ "epoch": 0.3428813918947592,
1460
+ "grad_norm": 0.38302966952323914,
1461
+ "learning_rate": 0.00027300916279262866,
1462
+ "loss": 1.0988,
1463
+ "step": 202
1464
+ },
1465
+ {
1466
+ "epoch": 0.34457882452790156,
1467
+ "grad_norm": 0.2614414095878601,
1468
+ "learning_rate": 0.0002727361411619245,
1469
+ "loss": 0.6772,
1470
+ "step": 203
1471
+ },
1472
+ {
1473
+ "epoch": 0.34627625716104393,
1474
+ "grad_norm": 0.3063081204891205,
1475
+ "learning_rate": 0.0002724618835796414,
1476
+ "loss": 0.7314,
1477
+ "step": 204
1478
+ },
1479
+ {
1480
+ "epoch": 0.3479736897941863,
1481
+ "grad_norm": 0.18658895790576935,
1482
+ "learning_rate": 0.0002721863928075503,
1483
+ "loss": 0.514,
1484
+ "step": 205
1485
+ },
1486
+ {
1487
+ "epoch": 0.3496711224273287,
1488
+ "grad_norm": 0.282010018825531,
1489
+ "learning_rate": 0.0002719096716198402,
1490
+ "loss": 0.892,
1491
+ "step": 206
1492
+ },
1493
+ {
1494
+ "epoch": 0.35136855506047104,
1495
+ "grad_norm": 0.17541489005088806,
1496
+ "learning_rate": 0.00027163172280309026,
1497
+ "loss": 0.5047,
1498
+ "step": 207
1499
+ },
1500
+ {
1501
+ "epoch": 0.3530659876936134,
1502
+ "grad_norm": 0.21916832029819489,
1503
+ "learning_rate": 0.0002713525491562421,
1504
+ "loss": 0.7146,
1505
+ "step": 208
1506
+ },
1507
+ {
1508
+ "epoch": 0.3547634203267558,
1509
+ "grad_norm": 0.20561501383781433,
1510
+ "learning_rate": 0.0002710721534905712,
1511
+ "loss": 0.6104,
1512
+ "step": 209
1513
+ },
1514
+ {
1515
+ "epoch": 0.35646085295989816,
1516
+ "grad_norm": 0.1947142332792282,
1517
+ "learning_rate": 0.00027079053862965875,
1518
+ "loss": 0.4924,
1519
+ "step": 210
1520
+ },
1521
+ {
1522
+ "epoch": 0.3581582855930405,
1523
+ "grad_norm": 0.23798146843910217,
1524
+ "learning_rate": 0.00027050770740936336,
1525
+ "loss": 0.6153,
1526
+ "step": 211
1527
+ },
1528
+ {
1529
+ "epoch": 0.3598557182261829,
1530
+ "grad_norm": 0.21775560081005096,
1531
+ "learning_rate": 0.00027022366267779224,
1532
+ "loss": 0.4658,
1533
+ "step": 212
1534
+ },
1535
+ {
1536
+ "epoch": 0.36155315085932527,
1537
+ "grad_norm": 0.24994409084320068,
1538
+ "learning_rate": 0.0002699384072952727,
1539
+ "loss": 0.5979,
1540
+ "step": 213
1541
+ },
1542
+ {
1543
+ "epoch": 0.36325058349246764,
1544
+ "grad_norm": 0.28469640016555786,
1545
+ "learning_rate": 0.0002696519441343233,
1546
+ "loss": 0.8796,
1547
+ "step": 214
1548
+ },
1549
+ {
1550
+ "epoch": 0.36494801612561,
1551
+ "grad_norm": 0.2747570276260376,
1552
+ "learning_rate": 0.0002693642760796248,
1553
+ "loss": 0.8625,
1554
+ "step": 215
1555
+ },
1556
+ {
1557
+ "epoch": 0.3666454487587524,
1558
+ "grad_norm": 0.2469591647386551,
1559
+ "learning_rate": 0.00026907540602799136,
1560
+ "loss": 0.6991,
1561
+ "step": 216
1562
+ },
1563
+ {
1564
+ "epoch": 0.36834288139189475,
1565
+ "grad_norm": 0.20425763726234436,
1566
+ "learning_rate": 0.00026878533688834123,
1567
+ "loss": 0.5774,
1568
+ "step": 217
1569
+ },
1570
+ {
1571
+ "epoch": 0.3700403140250371,
1572
+ "grad_norm": 0.2737872898578644,
1573
+ "learning_rate": 0.0002684940715816674,
1574
+ "loss": 0.9062,
1575
+ "step": 218
1576
+ },
1577
+ {
1578
+ "epoch": 0.3717377466581795,
1579
+ "grad_norm": 0.2064121514558792,
1580
+ "learning_rate": 0.00026820161304100823,
1581
+ "loss": 0.5054,
1582
+ "step": 219
1583
+ },
1584
+ {
1585
+ "epoch": 0.37343517929132186,
1586
+ "grad_norm": 0.14863868057727814,
1587
+ "learning_rate": 0.00026790796421141813,
1588
+ "loss": 0.285,
1589
+ "step": 220
1590
+ },
1591
+ {
1592
+ "epoch": 0.37513261192446423,
1593
+ "grad_norm": 0.12498918920755386,
1594
+ "learning_rate": 0.00026761312804993734,
1595
+ "loss": 0.1999,
1596
+ "step": 221
1597
+ },
1598
+ {
1599
+ "epoch": 0.3768300445576066,
1600
+ "grad_norm": 0.1726280152797699,
1601
+ "learning_rate": 0.0002673171075255629,
1602
+ "loss": 0.2852,
1603
+ "step": 222
1604
+ },
1605
+ {
1606
+ "epoch": 0.378527477190749,
1607
+ "grad_norm": 0.1533537358045578,
1608
+ "learning_rate": 0.0002670199056192181,
1609
+ "loss": 0.3106,
1610
+ "step": 223
1611
+ },
1612
+ {
1613
+ "epoch": 0.38022490982389134,
1614
+ "grad_norm": 0.125217467546463,
1615
+ "learning_rate": 0.00026672152532372287,
1616
+ "loss": 0.1804,
1617
+ "step": 224
1618
+ },
1619
+ {
1620
+ "epoch": 0.3819223424570337,
1621
+ "grad_norm": 0.05522383376955986,
1622
+ "learning_rate": 0.0002664219696437635,
1623
+ "loss": 0.0442,
1624
+ "step": 225
1625
+ },
1626
+ {
1627
+ "epoch": 0.3836197750901761,
1628
+ "grad_norm": 0.04138198867440224,
1629
+ "learning_rate": 0.00026612124159586237,
1630
+ "loss": 0.023,
1631
+ "step": 226
1632
+ },
1633
+ {
1634
+ "epoch": 0.38531720772331846,
1635
+ "grad_norm": 0.05575822666287422,
1636
+ "learning_rate": 0.0002658193442083475,
1637
+ "loss": 0.0024,
1638
+ "step": 227
1639
+ },
1640
+ {
1641
+ "epoch": 0.3870146403564608,
1642
+ "grad_norm": 0.12629126012325287,
1643
+ "learning_rate": 0.0002655162805213223,
1644
+ "loss": 0.1524,
1645
+ "step": 228
1646
+ },
1647
+ {
1648
+ "epoch": 0.38871207298960325,
1649
+ "grad_norm": 0.02942221239209175,
1650
+ "learning_rate": 0.00026521205358663477,
1651
+ "loss": 0.0096,
1652
+ "step": 229
1653
+ },
1654
+ {
1655
+ "epoch": 0.3904095056227456,
1656
+ "grad_norm": 0.0953650251030922,
1657
+ "learning_rate": 0.00026490666646784665,
1658
+ "loss": 0.0043,
1659
+ "step": 230
1660
+ },
1661
+ {
1662
+ "epoch": 0.392106938255888,
1663
+ "grad_norm": 0.005734459031373262,
1664
+ "learning_rate": 0.00026460012224020297,
1665
+ "loss": 0.0003,
1666
+ "step": 231
1667
+ },
1668
+ {
1669
+ "epoch": 0.39380437088903036,
1670
+ "grad_norm": 0.010758363641798496,
1671
+ "learning_rate": 0.0002642924239906006,
1672
+ "loss": 0.0003,
1673
+ "step": 232
1674
+ },
1675
+ {
1676
+ "epoch": 0.39550180352217273,
1677
+ "grad_norm": 0.01772010512650013,
1678
+ "learning_rate": 0.0002639835748175575,
1679
+ "loss": 0.0007,
1680
+ "step": 233
1681
+ },
1682
+ {
1683
+ "epoch": 0.3971992361553151,
1684
+ "grad_norm": 0.005056055262684822,
1685
+ "learning_rate": 0.0002636735778311815,
1686
+ "loss": 0.0002,
1687
+ "step": 234
1688
+ },
1689
+ {
1690
+ "epoch": 0.3988966687884575,
1691
+ "grad_norm": 0.24263891577720642,
1692
+ "learning_rate": 0.00026336243615313873,
1693
+ "loss": 0.0008,
1694
+ "step": 235
1695
+ },
1696
+ {
1697
+ "epoch": 0.40059410142159985,
1698
+ "grad_norm": 0.0014849180588498712,
1699
+ "learning_rate": 0.0002630501529166224,
1700
+ "loss": 0.0001,
1701
+ "step": 236
1702
+ },
1703
+ {
1704
+ "epoch": 0.4022915340547422,
1705
+ "grad_norm": 0.0037826071493327618,
1706
+ "learning_rate": 0.00026273673126632133,
1707
+ "loss": 0.0002,
1708
+ "step": 237
1709
+ },
1710
+ {
1711
+ "epoch": 0.4039889666878846,
1712
+ "grad_norm": 0.08331254124641418,
1713
+ "learning_rate": 0.0002624221743583881,
1714
+ "loss": 0.0016,
1715
+ "step": 238
1716
+ },
1717
+ {
1718
+ "epoch": 0.40568639932102696,
1719
+ "grad_norm": 0.002364553976804018,
1720
+ "learning_rate": 0.0002621064853604071,
1721
+ "loss": 0.0001,
1722
+ "step": 239
1723
+ },
1724
+ {
1725
+ "epoch": 0.40738383195416933,
1726
+ "grad_norm": 0.014542756602168083,
1727
+ "learning_rate": 0.0002617896674513632,
1728
+ "loss": 0.0002,
1729
+ "step": 240
1730
+ },
1731
+ {
1732
+ "epoch": 0.4090812645873117,
1733
+ "grad_norm": 0.0031418628059327602,
1734
+ "learning_rate": 0.00026147172382160914,
1735
+ "loss": 0.0001,
1736
+ "step": 241
1737
+ },
1738
+ {
1739
+ "epoch": 0.41077869722045407,
1740
+ "grad_norm": 0.11094752699136734,
1741
+ "learning_rate": 0.00026115265767283374,
1742
+ "loss": 0.0031,
1743
+ "step": 242
1744
+ },
1745
+ {
1746
+ "epoch": 0.41247612985359644,
1747
+ "grad_norm": 0.012769564054906368,
1748
+ "learning_rate": 0.0002608324722180296,
1749
+ "loss": 0.0005,
1750
+ "step": 243
1751
+ },
1752
+ {
1753
+ "epoch": 0.4141735624867388,
1754
+ "grad_norm": 0.055052801966667175,
1755
+ "learning_rate": 0.0002605111706814607,
1756
+ "loss": 0.0023,
1757
+ "step": 244
1758
+ },
1759
+ {
1760
+ "epoch": 0.4158709951198812,
1761
+ "grad_norm": 0.003668338293209672,
1762
+ "learning_rate": 0.00026018875629862996,
1763
+ "loss": 0.0002,
1764
+ "step": 245
1765
+ },
1766
+ {
1767
+ "epoch": 0.41756842775302355,
1768
+ "grad_norm": 0.009973675012588501,
1769
+ "learning_rate": 0.0002598652323162466,
1770
+ "loss": 0.0003,
1771
+ "step": 246
1772
+ },
1773
+ {
1774
+ "epoch": 0.4192658603861659,
1775
+ "grad_norm": 0.02005830593407154,
1776
+ "learning_rate": 0.0002595406019921936,
1777
+ "loss": 0.0008,
1778
+ "step": 247
1779
+ },
1780
+ {
1781
+ "epoch": 0.4209632930193083,
1782
+ "grad_norm": 0.02860446274280548,
1783
+ "learning_rate": 0.0002592148685954946,
1784
+ "loss": 0.0024,
1785
+ "step": 248
1786
+ },
1787
+ {
1788
+ "epoch": 0.42266072565245066,
1789
+ "grad_norm": 0.03582284599542618,
1790
+ "learning_rate": 0.0002588880354062814,
1791
+ "loss": 0.0014,
1792
+ "step": 249
1793
+ },
1794
+ {
1795
+ "epoch": 0.42435815828559303,
1796
+ "grad_norm": 0.03657930716872215,
1797
+ "learning_rate": 0.0002585601057157605,
1798
+ "loss": 0.0023,
1799
+ "step": 250
1800
+ },
1801
+ {
1802
+ "epoch": 0.42435815828559303,
1803
+ "eval_loss": 0.39342138171195984,
1804
+ "eval_runtime": 65.6277,
1805
+ "eval_samples_per_second": 2.941,
1806
+ "eval_steps_per_second": 2.941,
1807
+ "step": 250
1808
  }
1809
  ],
1810
  "logging_steps": 1,
 
1833
  "attributes": {}
1834
  }
1835
  },
1836
+ "total_flos": 1.6186741772569805e+17,
1837
  "train_batch_size": 1,
1838
  "trial_name": null,
1839
  "trial_params": null