smirki commited on
Commit
a2080b8
·
verified ·
1 Parent(s): d7b9ee1

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b8879fe095f0f5a0bc15de492b77df156859ad8ecdf5a7085278229661e4e78
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1701d1397a8f6bba03a038aad0ad88dddbe56212ed6ac753bf48dccf50090e24
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4b4a1552fc2fdd814013dbc89ab5192839e5c2df5e80659c707de2b136e30c5
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d5cbf4360926fd5a69ab224acee68af41a01edad72a7837d83695317fff4262
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e08deeaeeb196d6a8f41f78a6bc82f6af2fe7090aa1bcee32f60529b5b318530
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7882cf9d1800e045d97afc34ed2d790cd5f0da147adeb6824c51ec77a35e0c5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f92d50505b95f6ce6845ff480e774269016727df3369556b5afce1e371e58a3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47e7c3293120b0e1021fffede4430570f0c03435609ec93915f9f3961852aa6d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0003982128208599804,
5
  "eval_steps": 500,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -67,6 +67,66 @@
67
  "reward_std": 1.2611359059810638,
68
  "rewards/custom_reward_logic_v2": -3.831325000524521,
69
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  }
71
  ],
72
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0007964256417199608,
5
  "eval_steps": 500,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
67
  "reward_std": 1.2611359059810638,
68
  "rewards/custom_reward_logic_v2": -3.831325000524521,
69
  "step": 50
70
+ },
71
+ {
72
+ "completion_length": 860.95,
73
+ "epoch": 0.0004778553850319765,
74
+ "grad_norm": 0.16106949746608734,
75
+ "kl": 0.0007545762317022308,
76
+ "learning_rate": 3.7500000000000005e-06,
77
+ "loss": 0.0,
78
+ "reward": -3.99547501206398,
79
+ "reward_std": 1.233138319849968,
80
+ "rewards/custom_reward_logic_v2": -3.99547501206398,
81
+ "step": 60
82
+ },
83
+ {
84
+ "completion_length": 831.175,
85
+ "epoch": 0.0005574979492039726,
86
+ "grad_norm": 0.1723652333021164,
87
+ "kl": 0.0007971685263328254,
88
+ "learning_rate": 4.3750000000000005e-06,
89
+ "loss": 0.0,
90
+ "reward": -4.036549943685531,
91
+ "reward_std": 1.5394920334219933,
92
+ "rewards/custom_reward_logic_v2": -4.036549943685531,
93
+ "step": 70
94
+ },
95
+ {
96
+ "completion_length": 874.325,
97
+ "epoch": 0.0006371405133759687,
98
+ "grad_norm": 0.2079666703939438,
99
+ "kl": 0.0008876581850927323,
100
+ "learning_rate": 5e-06,
101
+ "loss": 0.0,
102
+ "reward": -3.92242501154542,
103
+ "reward_std": 1.2604085817933082,
104
+ "rewards/custom_reward_logic_v2": -3.92242501154542,
105
+ "step": 80
106
+ },
107
+ {
108
+ "completion_length": 791.91875,
109
+ "epoch": 0.0007167830775479647,
110
+ "grad_norm": 0.16253575682640076,
111
+ "kl": 0.0010255174711346626,
112
+ "learning_rate": 4.997620553954645e-06,
113
+ "loss": 0.0,
114
+ "reward": -3.364587500691414,
115
+ "reward_std": 1.2228698313236237,
116
+ "rewards/custom_reward_logic_v2": -3.364587500691414,
117
+ "step": 90
118
+ },
119
+ {
120
+ "completion_length": 846.2875,
121
+ "epoch": 0.0007964256417199608,
122
+ "grad_norm": 0.18019770085811615,
123
+ "kl": 0.0013353260728763416,
124
+ "learning_rate": 4.990486745229364e-06,
125
+ "loss": 0.0001,
126
+ "reward": -3.805912530422211,
127
+ "reward_std": 1.2458222389221192,
128
+ "rewards/custom_reward_logic_v2": -3.805912530422211,
129
+ "step": 100
130
  }
131
  ],
132
  "logging_steps": 10,