mgh6 commited on
Commit
41c1f83
1 Parent(s): c6c0a06

Training in progress, step 10240, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04079bfefeee402afc5a90d5c17f6f80721f862aaa72a88da6fe8812bbc9936a
3
  size 3246333568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29f4d92da286543c9bdb8ad539649d6985177e4b3c3961cba55582df3b0df187
3
  size 3246333568
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14807f80ce3bbc41ea4a5a0a8585f9f94fdf7d942abd7afa637ea124d5cb75fe
3
  size 6220672307
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27d09ec44e8186d29aaaccf802b9b06f8694f3099ca7a5d0e4d4230346284452
3
  size 6220672307
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb732be4c200b0b68d66033f545d69b981d9d12cd9b1fae529a2b5f11bc8689a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b60dc5f90831a3cb044b4ac92382bb3956ae1e5aaac38365110e8ca62c577d1
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d01bd9a4022a13f67fdd7d636c82f1f80d3271c4bc9442507442a46e90b7e21a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7894a9112e4a5e884676aae0344ab1825a867cb653f81b5c319da7c319adf295
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 8431.3798828125,
3
- "best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-7680",
4
- "epoch": 0.5674335269724873,
5
  "eval_steps": 512,
6
- "global_step": 7680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -307,6 +307,106 @@
307
  "eval_samples_per_second": 65.427,
308
  "eval_steps_per_second": 65.427,
309
  "step": 7680
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  }
311
  ],
312
  "logging_steps": 256,
 
1
  {
2
+ "best_metric": 8206.45703125,
3
+ "best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-10240",
4
+ "epoch": 0.7565780359633163,
5
  "eval_steps": 512,
6
+ "global_step": 10240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
307
  "eval_samples_per_second": 65.427,
308
  "eval_steps_per_second": 65.427,
309
  "step": 7680
310
+ },
311
+ {
312
+ "epoch": 0.59,
313
+ "learning_rate": 4.136249445840107e-05,
314
+ "loss": 9255.4785,
315
+ "step": 7936
316
+ },
317
+ {
318
+ "epoch": 0.61,
319
+ "learning_rate": 3.947096202157529e-05,
320
+ "loss": 9076.7949,
321
+ "step": 8192
322
+ },
323
+ {
324
+ "epoch": 0.61,
325
+ "eval_loss": 8366.28515625,
326
+ "eval_runtime": 54.9213,
327
+ "eval_samples_per_second": 61.87,
328
+ "eval_steps_per_second": 61.87,
329
+ "step": 8192
330
+ },
331
+ {
332
+ "epoch": 0.62,
333
+ "learning_rate": 3.757942958474952e-05,
334
+ "loss": 9096.5859,
335
+ "step": 8448
336
+ },
337
+ {
338
+ "epoch": 0.64,
339
+ "learning_rate": 3.5687897147923746e-05,
340
+ "loss": 9114.2637,
341
+ "step": 8704
342
+ },
343
+ {
344
+ "epoch": 0.64,
345
+ "eval_loss": 8331.78125,
346
+ "eval_runtime": 52.2996,
347
+ "eval_samples_per_second": 64.972,
348
+ "eval_steps_per_second": 64.972,
349
+ "step": 8704
350
+ },
351
+ {
352
+ "epoch": 0.66,
353
+ "learning_rate": 3.379636471109798e-05,
354
+ "loss": 9127.2627,
355
+ "step": 8960
356
+ },
357
+ {
358
+ "epoch": 0.68,
359
+ "learning_rate": 3.19048322742722e-05,
360
+ "loss": 9108.6582,
361
+ "step": 9216
362
+ },
363
+ {
364
+ "epoch": 0.68,
365
+ "eval_loss": 8280.7763671875,
366
+ "eval_runtime": 52.2791,
367
+ "eval_samples_per_second": 64.997,
368
+ "eval_steps_per_second": 64.997,
369
+ "step": 9216
370
+ },
371
+ {
372
+ "epoch": 0.7,
373
+ "learning_rate": 3.0013299837446435e-05,
374
+ "loss": 9030.9062,
375
+ "step": 9472
376
+ },
377
+ {
378
+ "epoch": 0.72,
379
+ "learning_rate": 2.8121767400620657e-05,
380
+ "loss": 9014.1484,
381
+ "step": 9728
382
+ },
383
+ {
384
+ "epoch": 0.72,
385
+ "eval_loss": 8238.181640625,
386
+ "eval_runtime": 52.4655,
387
+ "eval_samples_per_second": 64.766,
388
+ "eval_steps_per_second": 64.766,
389
+ "step": 9728
390
+ },
391
+ {
392
+ "epoch": 0.74,
393
+ "learning_rate": 2.6230234963794885e-05,
394
+ "loss": 8962.5713,
395
+ "step": 9984
396
+ },
397
+ {
398
+ "epoch": 0.76,
399
+ "learning_rate": 2.4338702526969114e-05,
400
+ "loss": 8950.7715,
401
+ "step": 10240
402
+ },
403
+ {
404
+ "epoch": 0.76,
405
+ "eval_loss": 8206.45703125,
406
+ "eval_runtime": 52.6843,
407
+ "eval_samples_per_second": 64.497,
408
+ "eval_steps_per_second": 64.497,
409
+ "step": 10240
410
  }
411
  ],
412
  "logging_steps": 256,