dada22231 commited on
Commit
9c80fad
1 Parent(s): c2b1662

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:151c18481c0c21e62724a832f1068444a8a45769016f76181dd59e90c0a94318
3
  size 156926880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c23ecfdaa0a3eba9965716bac2017962198d21f8191fdd4eb08417907ef2a6fb
3
  size 156926880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:436d871bbaa077d22e6ff94a263dabfa48f84e603ae3c45b41d4d161d45edf44
3
  size 313998650
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e01166188acec499165a3e4cac0c192028eeb10d6b74af75139c4e31da21410b
3
  size 313998650
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f626bd1418942f59d714a92696f0a14d683419a5791fd0cfc70d71476ae2116
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df7e5e54e28c2ceedfc7ae6045d608c8ad4853449c00b118f443d8bdea501f46
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86902189ee90d41ff69c42cb8f5c60a081bba545d6754d914823e35ec0f3a43e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8438aa97129094cc4883fee9705d680d655c2a6ca0571fa1fbe44c7bfb092306
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fce92dc1de428f1ad8b1bd74ef21624eaa8e4dd05f15a0489214ffc2b8eb69cc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56df6166fd118dcf90038143d85adf0f7e4d80bca4b6cd96c45ad995a37a73e9
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95c2cf0e22dbb9ad638e7e759a92209ced4d5166aa65477536fe7f0537618fc1
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4db6a93d2cee16ffa1a2a0696f25a5a3e2a82bef462e215ebd2653e9ca9b83f
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8879023307436182,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 33.859,
199
  "eval_steps_per_second": 8.803,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -212,12 +395,12 @@
212
  "should_evaluate": false,
213
  "should_log": false,
214
  "should_save": true,
215
- "should_training_stop": false
216
  },
217
  "attributes": {}
218
  }
219
  },
220
- "total_flos": 7.94731890081792e+16,
221
  "train_batch_size": 1,
222
  "trial_name": null,
223
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.7758046614872365,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 33.859,
199
  "eval_steps_per_second": 8.803,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.9234184239733629,
204
+ "grad_norm": 2.222177743911743,
205
+ "learning_rate": 5.500000000000001e-05,
206
+ "loss": 1.6803,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.9589345172031076,
211
+ "grad_norm": 2.106823444366455,
212
+ "learning_rate": 5.205685918464356e-05,
213
+ "loss": 1.7212,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.9944506104328524,
218
+ "grad_norm": 2.1512210369110107,
219
+ "learning_rate": 4.912632135009769e-05,
220
+ "loss": 1.7802,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 1.029966703662597,
225
+ "grad_norm": 4.948949337005615,
226
+ "learning_rate": 4.6220935509274235e-05,
227
+ "loss": 2.9527,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 1.065482796892342,
232
+ "grad_norm": 1.8342108726501465,
233
+ "learning_rate": 4.3353142970386564e-05,
234
+ "loss": 1.637,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 1.1009988901220866,
239
+ "grad_norm": 1.786393165588379,
240
+ "learning_rate": 4.053522406135775e-05,
241
+ "loss": 1.6031,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 1.1365149833518313,
246
+ "grad_norm": 1.9454339742660522,
247
+ "learning_rate": 3.777924554357096e-05,
248
+ "loss": 1.3323,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 1.172031076581576,
253
+ "grad_norm": 2.2301857471466064,
254
+ "learning_rate": 3.509700894014496e-05,
255
+ "loss": 1.5233,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 1.2075471698113207,
260
+ "grad_norm": 2.044020891189575,
261
+ "learning_rate": 3.250000000000001e-05,
262
+ "loss": 1.5415,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 1.2430632630410654,
267
+ "grad_norm": 2.0642783641815186,
268
+ "learning_rate": 2.9999339514117912e-05,
269
+ "loss": 1.4577,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 1.2785793562708103,
274
+ "grad_norm": 2.05415415763855,
275
+ "learning_rate": 2.760573569460757e-05,
276
+ "loss": 1.434,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 1.314095449500555,
281
+ "grad_norm": 2.0152251720428467,
282
+ "learning_rate": 2.53294383204969e-05,
283
+ "loss": 1.4434,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 1.3496115427302997,
288
+ "grad_norm": 2.1848814487457275,
289
+ "learning_rate": 2.3180194846605367e-05,
290
+ "loss": 1.5678,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 1.3851276359600444,
295
+ "grad_norm": 2.1174445152282715,
296
+ "learning_rate": 2.1167208663446025e-05,
297
+ "loss": 1.5425,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 1.420643729189789,
302
+ "grad_norm": 2.277348518371582,
303
+ "learning_rate": 1.9299099686894423e-05,
304
+ "loss": 1.3887,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 1.4561598224195338,
309
+ "grad_norm": 2.1913950443267822,
310
+ "learning_rate": 1.758386744638546e-05,
311
+ "loss": 1.4529,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 1.4916759156492785,
316
+ "grad_norm": 2.30416202545166,
317
+ "learning_rate": 1.602885682970026e-05,
318
+ "loss": 1.4513,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 1.5271920088790232,
323
+ "grad_norm": 2.0522561073303223,
324
+ "learning_rate": 1.464072663102903e-05,
325
+ "loss": 1.3965,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 1.5627081021087679,
330
+ "grad_norm": 2.1286027431488037,
331
+ "learning_rate": 1.3425421036992098e-05,
332
+ "loss": 1.5099,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 1.5982241953385128,
337
+ "grad_norm": 2.026170253753662,
338
+ "learning_rate": 1.2388144172720251e-05,
339
+ "loss": 1.4889,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 1.6337402885682575,
344
+ "grad_norm": 2.003553628921509,
345
+ "learning_rate": 1.1533337816991932e-05,
346
+ "loss": 1.415,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 1.6692563817980022,
351
+ "grad_norm": 2.1225321292877197,
352
+ "learning_rate": 1.0864662381854632e-05,
353
+ "loss": 1.3807,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 1.704772475027747,
358
+ "grad_norm": 2.1674954891204834,
359
+ "learning_rate": 1.0384981238178534e-05,
360
+ "loss": 1.3571,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 1.7402885682574918,
365
+ "grad_norm": 2.1650798320770264,
366
+ "learning_rate": 1.0096348454262845e-05,
367
+ "loss": 1.5067,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 1.7758046614872365,
372
+ "grad_norm": 2.0062143802642822,
373
+ "learning_rate": 1e-05,
374
+ "loss": 1.3259,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 1.7758046614872365,
379
+ "eval_loss": 1.8066482543945312,
380
+ "eval_runtime": 1.4758,
381
+ "eval_samples_per_second": 33.88,
382
+ "eval_steps_per_second": 8.809,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
395
  "should_evaluate": false,
396
  "should_log": false,
397
  "should_save": true,
398
+ "should_training_stop": true
399
  },
400
  "attributes": {}
401
  }
402
  },
403
+ "total_flos": 1.589463780163584e+17,
404
  "train_batch_size": 1,
405
  "trial_name": null,
406
  "trial_params": null