auxyus commited on
Commit
734c135
·
verified ·
1 Parent(s): a56a629

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5a439a6cef4ecb4cfb0552ccc4bb8f183dbaa21404b66a6d87e683af193576a
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f732bef2952e9d32f19e762db18002aed6c12f6c825354edc05263531b046467
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ff994df58fd21bbc62e850340ee2f3ff5c1ed1827118259fd6cb1c9c64e4ac9
3
- size 170920084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2feeebd9398df71afacce59452b2c73c4adb2295cd53299eb4c2ded241ccce8f
3
+ size 170920532
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddf89d8c106de4f39d61df2d30ed6ada0e5dc66c4de0a061bc16e3b4bc537498
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574041858ad47610f1228962be219c1774ebe5acbf20c9a7bf53d14a3ca80f21
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:596785cc644037bdf9b1374ba5340995054de5f4bde563878d8bc4f03a7aa10e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d9346c4fcc90fb1ec8546736583b76a4fae6bc25cb93181337c187d15da94a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.770531177520752,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.03910833007430583,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,135 @@
144
  "eval_samples_per_second": 12.57,
145
  "eval_steps_per_second": 3.143,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +301,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 2.2349209733903155e+17,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6793892979621887,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.07821666014861166,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 12.57,
145
  "eval_steps_per_second": 3.143,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.041715552079259546,
150
+ "grad_norm": 10.44023609161377,
151
+ "learning_rate": 0.0001697631521134985,
152
+ "loss": 2.5952,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.04432277408421327,
157
+ "grad_norm": 13.444787979125977,
158
+ "learning_rate": 0.00016585113790650388,
159
+ "loss": 2.6771,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.046929996089166995,
164
+ "grad_norm": 15.03112506866455,
165
+ "learning_rate": 0.0001617524614946192,
166
+ "loss": 3.0433,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.04953721809412071,
171
+ "grad_norm": 13.681419372558594,
172
+ "learning_rate": 0.0001574787410214407,
173
+ "loss": 3.3614,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.052144440099074436,
178
+ "grad_norm": 25.215314865112305,
179
+ "learning_rate": 0.00015304209081197425,
180
+ "loss": 3.507,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.052144440099074436,
185
+ "eval_loss": 0.7421609163284302,
186
+ "eval_runtime": 513.024,
187
+ "eval_samples_per_second": 12.592,
188
+ "eval_steps_per_second": 3.148,
189
+ "step": 200
190
+ },
191
+ {
192
+ "epoch": 0.05475166210402816,
193
+ "grad_norm": 9.96532917022705,
194
+ "learning_rate": 0.00014845508703326504,
195
+ "loss": 2.2643,
196
+ "step": 210
197
+ },
198
+ {
199
+ "epoch": 0.05735888410898188,
200
+ "grad_norm": 12.444780349731445,
201
+ "learning_rate": 0.00014373073204588556,
202
+ "loss": 2.6161,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 0.0599661061139356,
207
+ "grad_norm": 13.275497436523438,
208
+ "learning_rate": 0.00013888241754733208,
209
+ "loss": 2.9035,
210
+ "step": 230
211
+ },
212
+ {
213
+ "epoch": 0.06257332811888933,
214
+ "grad_norm": 22.568071365356445,
215
+ "learning_rate": 0.00013392388661180303,
216
+ "loss": 3.1491,
217
+ "step": 240
218
+ },
219
+ {
220
+ "epoch": 0.06518055012384305,
221
+ "grad_norm": 18.894039154052734,
222
+ "learning_rate": 0.0001288691947339621,
223
+ "loss": 3.6063,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 0.06518055012384305,
228
+ "eval_loss": 0.7025501132011414,
229
+ "eval_runtime": 512.1493,
230
+ "eval_samples_per_second": 12.614,
231
+ "eval_steps_per_second": 3.153,
232
+ "step": 250
233
+ },
234
+ {
235
+ "epoch": 0.06778777212879676,
236
+ "grad_norm": 9.569221496582031,
237
+ "learning_rate": 0.0001237326699871115,
238
+ "loss": 2.3234,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 0.07039499413375049,
243
+ "grad_norm": 12.406023979187012,
244
+ "learning_rate": 0.00011852887240871145,
245
+ "loss": 2.2466,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 0.07300221613870421,
250
+ "grad_norm": 14.062081336975098,
251
+ "learning_rate": 0.00011327255272837221,
252
+ "loss": 2.8102,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 0.07560943814365793,
257
+ "grad_norm": 12.484160423278809,
258
+ "learning_rate": 0.00010797861055530831,
259
+ "loss": 2.9179,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 0.07821666014861166,
264
+ "grad_norm": 19.24785804748535,
265
+ "learning_rate": 0.00010266205214377748,
266
+ "loss": 3.4329,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 0.07821666014861166,
271
+ "eval_loss": 0.6793892979621887,
272
+ "eval_runtime": 512.6051,
273
+ "eval_samples_per_second": 12.602,
274
+ "eval_steps_per_second": 3.151,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 4.487523283595428e+17,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null