ncbateman commited on
Commit
d7aa23b
·
verified ·
1 Parent(s): 176e709

Training in progress, step 535, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32f9aed1f2401361ea2ec2e007476a61dfde89a92cbfa844aa250aa8aae479b9
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:173c968110406154bbd6958e0575f0041084fe35c92872803251c373142f7599
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45205e8e1552e7490a9b7109bcece0f8aed45b92c38d969d378e0ccf54848cae
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:731ab2eb7506e7b8fba727ae9e889e4883eba19e71ccb1aee37d0543327dcf26
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4561e3ee2c17cf50619bdcd83396129cd0378f1bf28e100d14054fa49d2339e3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9ee5b1533ef89e38b0a831d8e4b8b744f0cbca34e3e202f7d262ca1861c412
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cbba19fa588799a647ab10e3adc360f543d9651c93cfcae6eb523c65fdc2328
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f6265b720d778f2ce309335230d277e55711db968e54ca2d8c342eedbbfb57
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6858621805241022,
5
  "eval_steps": 386,
6
- "global_step": 530,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3733,6 +3733,41 @@
3733
  "learning_rate": 9.613459075424034e-05,
3734
  "loss": 0.8122,
3735
  "step": 530
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3736
  }
3737
  ],
3738
  "logging_steps": 1,
@@ -3752,7 +3787,7 @@
3752
  "attributes": {}
3753
  }
3754
  },
3755
- "total_flos": 5.92506700997591e+17,
3756
  "train_batch_size": 4,
3757
  "trial_name": null,
3758
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6923325784535749,
5
  "eval_steps": 386,
6
+ "global_step": 535,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3733
  "learning_rate": 9.613459075424034e-05,
3734
  "loss": 0.8122,
3735
  "step": 530
3736
+ },
3737
+ {
3738
+ "epoch": 0.6871562601099968,
3739
+ "grad_norm": 0.7333494424819946,
3740
+ "learning_rate": 9.611868000118452e-05,
3741
+ "loss": 0.8027,
3742
+ "step": 531
3743
+ },
3744
+ {
3745
+ "epoch": 0.6884503396958913,
3746
+ "grad_norm": 0.7772257924079895,
3747
+ "learning_rate": 9.61027378916968e-05,
3748
+ "loss": 0.8538,
3749
+ "step": 532
3750
+ },
3751
+ {
3752
+ "epoch": 0.6897444192817859,
3753
+ "grad_norm": 0.7605924606323242,
3754
+ "learning_rate": 9.60867644366163e-05,
3755
+ "loss": 0.875,
3756
+ "step": 533
3757
+ },
3758
+ {
3759
+ "epoch": 0.6910384988676803,
3760
+ "grad_norm": 0.8444223999977112,
3761
+ "learning_rate": 9.607075964680352e-05,
3762
+ "loss": 1.0179,
3763
+ "step": 534
3764
+ },
3765
+ {
3766
+ "epoch": 0.6923325784535749,
3767
+ "grad_norm": 0.7386454939842224,
3768
+ "learning_rate": 9.605472353314023e-05,
3769
+ "loss": 0.9023,
3770
+ "step": 535
3771
  }
3772
  ],
3773
  "logging_steps": 1,
 
3787
  "attributes": {}
3788
  }
3789
  },
3790
+ "total_flos": 5.980963868560589e+17,
3791
  "train_batch_size": 4,
3792
  "trial_name": null,
3793
  "trial_params": null