ribesstefano commited on
Commit
afb5b0d
·
verified ·
1 Parent(s): f8346ce

Training in progress, step 25000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67db7fb5753287fec24d570fb9e463675efaa498fa6d43a9b9c5b7d5276de37e
3
  size 409608164
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d464023b8f1474910766ee80ad14a28e66f63573ca2b6fb03947ac68b967bb41
3
  size 409608164
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:885da78fc800f2d8b7fb905e4b00218a8293bb01d93c369ccba96ed84cdd299b
3
  size 814647162
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2d33601ea1dc880400999b7420e298be3e4cbc04c3e87157fb01b29ee13561d
3
  size 814647162
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6129e57ccde2e74721074ef039aefd879898938d0c6bc6383123ae34cb563028
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13af011930134dbe96e128214e5fbcde3960d4f80e067bf43289611e2726c1ea
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62ab762d9f0d3d15a42cef5723b0f5f86203957cf86c4aa9da4491b50bed6eb5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01c6651fb3b50890a831e967efdfa4694c1d4e9a2df9e75a1dc5c40d85362105
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.5477160056657224,
3
  "best_model_checkpoint": "/mimer/NOBACKUP/groups/naiss2023-6-290/stefano/models//PROTAC-Splitter-EncoderDecoder-lr_cosine-opt25/checkpoint-10000",
4
- "epoch": 1.9727756954034326,
5
  "eval_steps": 2500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -413,6 +413,76 @@
413
  "learning_rate": 4.52623294202573e-05,
414
  "loss": 0.0017,
415
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  }
417
  ],
418
  "logging_steps": 500,
@@ -432,7 +502,7 @@
432
  "attributes": {}
433
  }
434
  },
435
- "total_flos": 1.7329296562502707e+17,
436
  "train_batch_size": 128,
437
  "trial_name": null,
438
  "trial_params": null
 
1
  {
2
  "best_metric": 0.5477160056657224,
3
  "best_model_checkpoint": "/mimer/NOBACKUP/groups/naiss2023-6-290/stefano/models//PROTAC-Splitter-EncoderDecoder-lr_cosine-opt25/checkpoint-10000",
4
+ "epoch": 2.465969619254291,
5
  "eval_steps": 2500,
6
+ "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
413
  "learning_rate": 4.52623294202573e-05,
414
  "loss": 0.0017,
415
  "step": 20000
416
+ },
417
+ {
418
+ "epoch": 2.0220950877885184,
419
+ "grad_norm": 0.047866348177194595,
420
+ "learning_rate": 4.502958120260894e-05,
421
+ "loss": 0.0017,
422
+ "step": 20500
423
+ },
424
+ {
425
+ "epoch": 2.0714144801736043,
426
+ "grad_norm": 0.053165681660175323,
427
+ "learning_rate": 4.479188108686714e-05,
428
+ "loss": 0.0019,
429
+ "step": 21000
430
+ },
431
+ {
432
+ "epoch": 2.12073387255869,
433
+ "grad_norm": 0.04063253104686737,
434
+ "learning_rate": 4.4549287839450324e-05,
435
+ "loss": 0.0017,
436
+ "step": 21500
437
+ },
438
+ {
439
+ "epoch": 2.170053264943776,
440
+ "grad_norm": 0.059268295764923096,
441
+ "learning_rate": 4.4301861436502156e-05,
442
+ "loss": 0.0015,
443
+ "step": 22000
444
+ },
445
+ {
446
+ "epoch": 2.219372657328862,
447
+ "grad_norm": 0.021407226100564003,
448
+ "learning_rate": 4.404966304906363e-05,
449
+ "loss": 0.0014,
450
+ "step": 22500
451
+ },
452
+ {
453
+ "epoch": 2.2686920497139473,
454
+ "grad_norm": 0.027945173904299736,
455
+ "learning_rate": 4.379275502794983e-05,
456
+ "loss": 0.0014,
457
+ "step": 23000
458
+ },
459
+ {
460
+ "epoch": 2.318011442099033,
461
+ "grad_norm": 0.03261112794280052,
462
+ "learning_rate": 4.353120088833501e-05,
463
+ "loss": 0.0014,
464
+ "step": 23500
465
+ },
466
+ {
467
+ "epoch": 2.367330834484119,
468
+ "grad_norm": 0.05259308964014053,
469
+ "learning_rate": 4.326506529404972e-05,
470
+ "loss": 0.0013,
471
+ "step": 24000
472
+ },
473
+ {
474
+ "epoch": 2.416650226869205,
475
+ "grad_norm": 0.0584435798227787,
476
+ "learning_rate": 4.2994959806435226e-05,
477
+ "loss": 0.0015,
478
+ "step": 24500
479
+ },
480
+ {
481
+ "epoch": 2.465969619254291,
482
+ "grad_norm": 0.022548576816916466,
483
+ "learning_rate": 4.2719868638689734e-05,
484
+ "loss": 0.0012,
485
+ "step": 25000
486
  }
487
  ],
488
  "logging_steps": 500,
 
502
  "attributes": {}
503
  }
504
  },
505
+ "total_flos": 2.1661693972804915e+17,
506
  "train_batch_size": 128,
507
  "trial_name": null,
508
  "trial_params": null