smirki commited on
Commit
585d282
·
verified ·
1 Parent(s): 0a8c63d

Training in progress, step 1550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:684ffe225c9700091e981d7024210dc263a03c53399f30fe5949b6f253fdbcc7
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4620c952d9d47f51b52dbaa4d676d99f0fa4424c13c706daf27aef599e4f2780
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a687432a9d28051c1c39f8f0f4a0e50ee514d1a0fd3655eab260eb68945b822d
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dcd3d1e5ce9aeb2f3a0a439162a6354e3a87bdd42c2d824aa351222059be0bd
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99aee5f4fee1e5dcba147a263eab258ec9ed62b758fd41d69b67bd6b96aba2c6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3862494dcb8330500216500c3c10e90221d897be72203e9a311c19db30c18205
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8bce7973cc00369755cc2848c8e4a859027c75254c0a4740d15b2cd113fceee
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbfbb7156838856bc44aea3d88f369f30c1abce4e6c21899507c2f357e97b51c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.012145491036229403,
5
  "eval_steps": 500,
6
- "global_step": 1525,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1831,6 +1831,42 @@
1831
  "reward_std": 0.3324665643274784,
1832
  "rewards/custom_reward_simplified_v7_dblog": 0.790625,
1833
  "step": 1520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1834
  }
1835
  ],
1836
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.012344597446659392,
5
  "eval_steps": 500,
6
+ "global_step": 1550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1831
  "reward_std": 0.3324665643274784,
1832
  "rewards/custom_reward_simplified_v7_dblog": 0.790625,
1833
  "step": 1520
1834
+ },
1835
+ {
1836
+ "completion_length": 674.3625,
1837
+ "epoch": 0.0121853123183154,
1838
+ "grad_norm": 0.2344941943883896,
1839
+ "kl": 0.012514100456610323,
1840
+ "learning_rate": 1.7482355012393177e-06,
1841
+ "loss": 0.0005,
1842
+ "reward": 0.859375,
1843
+ "reward_std": 0.3387090668082237,
1844
+ "rewards/custom_reward_simplified_v7_dblog": 0.859375,
1845
+ "step": 1530
1846
+ },
1847
+ {
1848
+ "completion_length": 718.6,
1849
+ "epoch": 0.012264954882487397,
1850
+ "grad_norm": 0.2631664276123047,
1851
+ "kl": 0.014576551388017833,
1852
+ "learning_rate": 1.7136381096209665e-06,
1853
+ "loss": 0.0006,
1854
+ "reward": 0.653125,
1855
+ "reward_std": 0.24619419425725936,
1856
+ "rewards/custom_reward_simplified_v7_dblog": 0.653125,
1857
+ "step": 1540
1858
+ },
1859
+ {
1860
+ "completion_length": 706.28125,
1861
+ "epoch": 0.012344597446659392,
1862
+ "grad_norm": 0.20134921371936798,
1863
+ "kl": 0.012202254333533346,
1864
+ "learning_rate": 1.6792070619660977e-06,
1865
+ "loss": 0.0005,
1866
+ "reward": 0.84375,
1867
+ "reward_std": 0.3321776181459427,
1868
+ "rewards/custom_reward_simplified_v7_dblog": 0.84375,
1869
+ "step": 1550
1870
  }
1871
  ],
1872
  "logging_steps": 10,