mgh6 commited on
Commit
fdacb34
1 Parent(s): 95468c9

Training in progress, step 7680, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:869b484e58461f1536cc21d599303cb514c3d752e29ef44231f6d6c363605817
3
  size 4725595416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ca37dad208975487ae890dd2447d9fd111a4d500fddec1a394f9efeef88557e
3
  size 4725595416
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14a089b06ff32225e1d8e28e6b6c1cab9f01e70af7cbc2eac64ef56b2989354d
3
  size 9179193343
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b8169b03af949ee4c5ddf91eed20622e053b6dde7d912b4c3311f20f6495a7
3
  size 9179193343
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc1239486b7f83e4a2231cde24a50b503b22ee79d6ee232760274da141c18674
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb732be4c200b0b68d66033f545d69b981d9d12cd9b1fae529a2b5f11bc8689a
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ce8999a5d1b14a256d594f72ba3d10015736fefa5fd7057a03491428983da78
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7802dd5061761c471fafcb314e7fa5bea1fb541e8e1ce0821e89ee84f8e88b84
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 4288.33056640625,
3
- "best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-5120",
4
- "epoch": 0.37828901798165815,
5
  "eval_steps": 512,
6
- "global_step": 5120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -207,6 +207,106 @@
207
  "eval_samples_per_second": 66.616,
208
  "eval_steps_per_second": 66.616,
209
  "step": 5120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  }
211
  ],
212
  "logging_steps": 256,
 
1
  {
2
+ "best_metric": 3752.7509765625,
3
+ "best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-7680",
4
+ "epoch": 0.5674335269724873,
5
  "eval_steps": 512,
6
+ "global_step": 7680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
207
  "eval_samples_per_second": 66.616,
208
  "eval_steps_per_second": 66.616,
209
  "step": 5120
210
+ },
211
+ {
212
+ "epoch": 0.4,
213
+ "learning_rate": 9.205556376533177e-05,
214
+ "loss": 5085.8599,
215
+ "step": 5376
216
+ },
217
+ {
218
+ "epoch": 0.42,
219
+ "learning_rate": 9.167725727796661e-05,
220
+ "loss": 5071.4478,
221
+ "step": 5632
222
+ },
223
+ {
224
+ "epoch": 0.42,
225
+ "eval_loss": 4449.0048828125,
226
+ "eval_runtime": 55.4547,
227
+ "eval_samples_per_second": 61.275,
228
+ "eval_steps_per_second": 61.275,
229
+ "step": 5632
230
+ },
231
+ {
232
+ "epoch": 0.44,
233
+ "learning_rate": 9.129895079060146e-05,
234
+ "loss": 5510.5103,
235
+ "step": 5888
236
+ },
237
+ {
238
+ "epoch": 0.45,
239
+ "learning_rate": 9.09206443032363e-05,
240
+ "loss": 5384.3877,
241
+ "step": 6144
242
+ },
243
+ {
244
+ "epoch": 0.45,
245
+ "eval_loss": 7380.9560546875,
246
+ "eval_runtime": 49.4679,
247
+ "eval_samples_per_second": 68.691,
248
+ "eval_steps_per_second": 68.691,
249
+ "step": 6144
250
+ },
251
+ {
252
+ "epoch": 0.47,
253
+ "learning_rate": 9.054233781587114e-05,
254
+ "loss": 5411.5742,
255
+ "step": 6400
256
+ },
257
+ {
258
+ "epoch": 0.49,
259
+ "learning_rate": 9.016403132850599e-05,
260
+ "loss": 5327.8291,
261
+ "step": 6656
262
+ },
263
+ {
264
+ "epoch": 0.49,
265
+ "eval_loss": 6015.3486328125,
266
+ "eval_runtime": 49.5302,
267
+ "eval_samples_per_second": 68.605,
268
+ "eval_steps_per_second": 68.605,
269
+ "step": 6656
270
+ },
271
+ {
272
+ "epoch": 0.51,
273
+ "learning_rate": 8.978572484114084e-05,
274
+ "loss": 5498.8262,
275
+ "step": 6912
276
+ },
277
+ {
278
+ "epoch": 0.53,
279
+ "learning_rate": 8.940741835377569e-05,
280
+ "loss": 5376.377,
281
+ "step": 7168
282
+ },
283
+ {
284
+ "epoch": 0.53,
285
+ "eval_loss": 4817.3671875,
286
+ "eval_runtime": 49.2566,
287
+ "eval_samples_per_second": 68.986,
288
+ "eval_steps_per_second": 68.986,
289
+ "step": 7168
290
+ },
291
+ {
292
+ "epoch": 0.55,
293
+ "learning_rate": 8.902911186641053e-05,
294
+ "loss": 5066.939,
295
+ "step": 7424
296
+ },
297
+ {
298
+ "epoch": 0.57,
299
+ "learning_rate": 8.865080537904538e-05,
300
+ "loss": 4955.6113,
301
+ "step": 7680
302
+ },
303
+ {
304
+ "epoch": 0.57,
305
+ "eval_loss": 3752.7509765625,
306
+ "eval_runtime": 51.0919,
307
+ "eval_samples_per_second": 66.508,
308
+ "eval_steps_per_second": 66.508,
309
+ "step": 7680
310
  }
311
  ],
312
  "logging_steps": 256,