rootxhacker commited on
Commit
28f0278
·
verified ·
1 Parent(s): 14c23cf

Training in progress, step 13500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:211303fda3172e98417fefcf4ae565442875c8e0001f83fa9b613f42eb68e416
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0fec3c5b1e7e374b9bebda92218a1bdc7fadf77f9f2fd358d7738c92849ab15
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bf6caeca7f6bd6acaeee1391b627a2e314559d2e523a534ad583a26de79d19e
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:961d4f3f88c2859e92add0976bc93ab8db0649cc3da6695526e297d735998366
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbf8948534182fde8a4da31776a53cc78337e1e18ed4b526e2ee8ca99eff5731
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81ec269cbd23a3955804ddaef963a9e0d68f2087a109e239da5baed50032493e
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:501e0348ebe2e7da1549a0a032d61d9c5a8a3fbf8f3846e0f360ce065e6e05b7
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:861918ef9bae87f3c0647e76fd5a519763415c2dba1ede7121af8dc80bfb456a
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da01fa338433614d7203b84552679acf4a96484a8201c92d575f7a8bdc7e698e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:483cd8caa2794b8eb45fe09567d024ef71ee8ff6f6ba358a1febd749509b4c7e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 13000,
3
  "best_metric": 1.5033278465270996,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
5
- "epoch": 0.9999230828397816,
6
  "eval_steps": 250,
7
- "global_step": 13000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2244,6 +2244,92 @@
2244
  "eval_samples_per_second": 54.978,
2245
  "eval_steps_per_second": 13.745,
2246
  "step": 13000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2247
  }
2248
  ],
2249
  "logging_steps": 50,
 
2
  "best_global_step": 13000,
3
  "best_metric": 1.5033278465270996,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
5
+ "epoch": 1.038381662949004,
6
  "eval_steps": 250,
7
+ "global_step": 13500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2244
  "eval_samples_per_second": 54.978,
2245
  "eval_steps_per_second": 13.745,
2246
  "step": 13000
2247
+ },
2248
+ {
2249
+ "epoch": 1.0037689408507038,
2250
+ "grad_norm": 1.291033387184143,
2251
+ "learning_rate": 7.340465424065291e-05,
2252
+ "loss": 1.4455,
2253
+ "step": 13050
2254
+ },
2255
+ {
2256
+ "epoch": 1.007614798861626,
2257
+ "grad_norm": 1.247129201889038,
2258
+ "learning_rate": 7.31473107606338e-05,
2259
+ "loss": 1.4629,
2260
+ "step": 13100
2261
+ },
2262
+ {
2263
+ "epoch": 1.0114606568725482,
2264
+ "grad_norm": 1.2177772521972656,
2265
+ "learning_rate": 7.288996728061467e-05,
2266
+ "loss": 1.5715,
2267
+ "step": 13150
2268
+ },
2269
+ {
2270
+ "epoch": 1.0153065148834706,
2271
+ "grad_norm": 1.2471716403961182,
2272
+ "learning_rate": 7.263262380059556e-05,
2273
+ "loss": 1.4244,
2274
+ "step": 13200
2275
+ },
2276
+ {
2277
+ "epoch": 1.0191523728943928,
2278
+ "grad_norm": 0.8932450413703918,
2279
+ "learning_rate": 7.237528032057644e-05,
2280
+ "loss": 1.4278,
2281
+ "step": 13250
2282
+ },
2283
+ {
2284
+ "epoch": 1.0191523728943928,
2285
+ "eval_loss": 1.5201970338821411,
2286
+ "eval_runtime": 17.9356,
2287
+ "eval_samples_per_second": 55.755,
2288
+ "eval_steps_per_second": 13.939,
2289
+ "step": 13250
2290
+ },
2291
+ {
2292
+ "epoch": 1.022998230905315,
2293
+ "grad_norm": 1.9957834482192993,
2294
+ "learning_rate": 7.211793684055732e-05,
2295
+ "loss": 1.5017,
2296
+ "step": 13300
2297
+ },
2298
+ {
2299
+ "epoch": 1.0268440889162371,
2300
+ "grad_norm": 1.432619571685791,
2301
+ "learning_rate": 7.186059336053821e-05,
2302
+ "loss": 1.4271,
2303
+ "step": 13350
2304
+ },
2305
+ {
2306
+ "epoch": 1.0306899469271595,
2307
+ "grad_norm": 1.3298619985580444,
2308
+ "learning_rate": 7.16032498805191e-05,
2309
+ "loss": 1.5726,
2310
+ "step": 13400
2311
+ },
2312
+ {
2313
+ "epoch": 1.0345358049380817,
2314
+ "grad_norm": 10.102746963500977,
2315
+ "learning_rate": 7.134590640049997e-05,
2316
+ "loss": 1.3938,
2317
+ "step": 13450
2318
+ },
2319
+ {
2320
+ "epoch": 1.038381662949004,
2321
+ "grad_norm": 1.9288721084594727,
2322
+ "learning_rate": 7.108856292048085e-05,
2323
+ "loss": 1.4264,
2324
+ "step": 13500
2325
+ },
2326
+ {
2327
+ "epoch": 1.038381662949004,
2328
+ "eval_loss": 1.5168194770812988,
2329
+ "eval_runtime": 18.139,
2330
+ "eval_samples_per_second": 55.13,
2331
+ "eval_steps_per_second": 13.782,
2332
+ "step": 13500
2333
  }
2334
  ],
2335
  "logging_steps": 50,