Federic commited on
Commit
e0f7fe0
1 Parent(s): 3d86f6b

Training in progress, step 275, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54c4947058fd8e7ee421b0b46fc349022d4a1f3f8246ef6c3d3b6bda09b72f50
3
  size 838904832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6c9e9fde3bf761964ce0a1096daf1a2f902a2ec3817c338700dd0fdb422b7b
3
  size 838904832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a651b2e845e714d6bbbfe26616bd79e994bd844b56c8ce9664fd66902226483
3
- size 420633876
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c44e3e00e2ab4d12e4e2b2418ce9291412c0dade8841637f3f38586085364aea
3
+ size 421458386
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc1f20496eb68bf92a3c0bedec6630dbaf08516bee744709362b85a6b5810eb0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cecdcf6d8c7c779a0c31f47b4cfa05311ab6c9a135282cd57f3efb2ff76f57b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d1650f5062195d8ee65b24ab00a137ab48cccbff41f41ba060d4208547a763c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd2eafba86db0126a84de125a9a439f555cacfcb5b82fd7e6af07740e165b9d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1507,13 +1507,163 @@
1507
  "learning_rate": 0.0002,
1508
  "loss": 0.3503,
1509
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1510
  }
1511
  ],
1512
  "logging_steps": 1,
1513
- "max_steps": 250,
1514
- "num_train_epochs": 1,
1515
  "save_steps": 25,
1516
- "total_flos": 2.990177006051328e+16,
1517
  "trial_name": null,
1518
  "trial_params": null
1519
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1,
5
  "eval_steps": 500,
6
+ "global_step": 275,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1507
  "learning_rate": 0.0002,
1508
  "loss": 0.3503,
1509
  "step": 250
1510
+ },
1511
+ {
1512
+ "epoch": 1.0,
1513
+ "learning_rate": 0.0002,
1514
+ "loss": 0.6092,
1515
+ "step": 251
1516
+ },
1517
+ {
1518
+ "epoch": 1.01,
1519
+ "learning_rate": 0.0002,
1520
+ "loss": 0.5481,
1521
+ "step": 252
1522
+ },
1523
+ {
1524
+ "epoch": 1.01,
1525
+ "learning_rate": 0.0002,
1526
+ "loss": 0.5902,
1527
+ "step": 253
1528
+ },
1529
+ {
1530
+ "epoch": 1.02,
1531
+ "learning_rate": 0.0002,
1532
+ "loss": 0.4603,
1533
+ "step": 254
1534
+ },
1535
+ {
1536
+ "epoch": 1.02,
1537
+ "learning_rate": 0.0002,
1538
+ "loss": 0.5344,
1539
+ "step": 255
1540
+ },
1541
+ {
1542
+ "epoch": 1.02,
1543
+ "learning_rate": 0.0002,
1544
+ "loss": 0.5136,
1545
+ "step": 256
1546
+ },
1547
+ {
1548
+ "epoch": 1.03,
1549
+ "learning_rate": 0.0002,
1550
+ "loss": 0.5304,
1551
+ "step": 257
1552
+ },
1553
+ {
1554
+ "epoch": 1.03,
1555
+ "learning_rate": 0.0002,
1556
+ "loss": 0.4625,
1557
+ "step": 258
1558
+ },
1559
+ {
1560
+ "epoch": 1.04,
1561
+ "learning_rate": 0.0002,
1562
+ "loss": 0.4841,
1563
+ "step": 259
1564
+ },
1565
+ {
1566
+ "epoch": 1.04,
1567
+ "learning_rate": 0.0002,
1568
+ "loss": 0.4548,
1569
+ "step": 260
1570
+ },
1571
+ {
1572
+ "epoch": 1.04,
1573
+ "learning_rate": 0.0002,
1574
+ "loss": 0.4686,
1575
+ "step": 261
1576
+ },
1577
+ {
1578
+ "epoch": 1.05,
1579
+ "learning_rate": 0.0002,
1580
+ "loss": 0.4717,
1581
+ "step": 262
1582
+ },
1583
+ {
1584
+ "epoch": 1.05,
1585
+ "learning_rate": 0.0002,
1586
+ "loss": 0.4775,
1587
+ "step": 263
1588
+ },
1589
+ {
1590
+ "epoch": 1.06,
1591
+ "learning_rate": 0.0002,
1592
+ "loss": 0.503,
1593
+ "step": 264
1594
+ },
1595
+ {
1596
+ "epoch": 1.06,
1597
+ "learning_rate": 0.0002,
1598
+ "loss": 0.4712,
1599
+ "step": 265
1600
+ },
1601
+ {
1602
+ "epoch": 1.06,
1603
+ "learning_rate": 0.0002,
1604
+ "loss": 0.4782,
1605
+ "step": 266
1606
+ },
1607
+ {
1608
+ "epoch": 1.07,
1609
+ "learning_rate": 0.0002,
1610
+ "loss": 0.4649,
1611
+ "step": 267
1612
+ },
1613
+ {
1614
+ "epoch": 1.07,
1615
+ "learning_rate": 0.0002,
1616
+ "loss": 0.4758,
1617
+ "step": 268
1618
+ },
1619
+ {
1620
+ "epoch": 1.08,
1621
+ "learning_rate": 0.0002,
1622
+ "loss": 0.4192,
1623
+ "step": 269
1624
+ },
1625
+ {
1626
+ "epoch": 1.08,
1627
+ "learning_rate": 0.0002,
1628
+ "loss": 0.4511,
1629
+ "step": 270
1630
+ },
1631
+ {
1632
+ "epoch": 1.08,
1633
+ "learning_rate": 0.0002,
1634
+ "loss": 0.4572,
1635
+ "step": 271
1636
+ },
1637
+ {
1638
+ "epoch": 1.09,
1639
+ "learning_rate": 0.0002,
1640
+ "loss": 0.4277,
1641
+ "step": 272
1642
+ },
1643
+ {
1644
+ "epoch": 1.09,
1645
+ "learning_rate": 0.0002,
1646
+ "loss": 0.4366,
1647
+ "step": 273
1648
+ },
1649
+ {
1650
+ "epoch": 1.1,
1651
+ "learning_rate": 0.0002,
1652
+ "loss": 0.403,
1653
+ "step": 274
1654
+ },
1655
+ {
1656
+ "epoch": 1.1,
1657
+ "learning_rate": 0.0002,
1658
+ "loss": 0.4161,
1659
+ "step": 275
1660
  }
1661
  ],
1662
  "logging_steps": 1,
1663
+ "max_steps": 500,
1664
+ "num_train_epochs": 2,
1665
  "save_steps": 25,
1666
+ "total_flos": 3.327201233842176e+16,
1667
  "trial_name": null,
1668
  "trial_params": null
1669
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52ab7055ed2ab4ea386a14f2c54955245810bb73dc11cc1cce38f825dbf4c63e
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8ae64e171536bf78dc95ca36df03d7f40620336c1b55d8bd15bbd85cdd8bf5
3
  size 4600