bhuvanmdev commited on
Commit
e5d0935
·
verified ·
1 Parent(s): e569efd

Training in progress, step 2680, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac4c2e27c7954ba8f32458e86bce305764577f448e1ef7b05942fdf1638b8d3
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8edf825d7e2ed21d2d16d8a88071fe27f783e5456eb6e2f0ae6c8dfef2d8ed1
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:281f3e1e3de94056c7322594abe70311fe1bd96ae510bce52c2cda2200510403
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a3fd810244bbb9806c8d080b1a9cfc225f3cbfd1200f5a73aef5ff9d1728ffc
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61c76e152102faadd3e0723a7b1e98668fc77dc3b57498a40931f275e4f17e53
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0041b61de0cc7e619ce44504c6c5477bc99a6c82547e7c9dd6815272ffe05c3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae0efadb0abbc637034bafcf665143567ca2a6d58001c0ddda8fd674ac11d7db
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:436b183fda379ac02abfc2e4c2b3ebe9640c6cb93100008fe5409c8282dc8c08
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9516994633273703,
5
  "eval_steps": 500,
6
- "global_step": 2660,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2135,14 +2135,30 @@
2135
  "loss": 0.376,
2136
  "num_input_tokens_seen": 1804337,
2137
  "step": 2660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2138
  }
2139
  ],
2140
  "logging_steps": 10,
2141
  "max_steps": 2795,
2142
- "num_input_tokens_seen": 1804337,
2143
  "num_train_epochs": 1,
2144
  "save_steps": 20,
2145
- "total_flos": 4.05731673087529e+16,
2146
  "train_batch_size": 1,
2147
  "trial_name": null,
2148
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9588550983899821,
5
  "eval_steps": 500,
6
+ "global_step": 2680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2135
  "loss": 0.376,
2136
  "num_input_tokens_seen": 1804337,
2137
  "step": 2660
2138
+ },
2139
+ {
2140
+ "epoch": 0.9552772808586762,
2141
+ "grad_norm": 0.32699060440063477,
2142
+ "learning_rate": 8.944543828264759e-06,
2143
+ "loss": 0.4,
2144
+ "num_input_tokens_seen": 1811353,
2145
+ "step": 2670
2146
+ },
2147
+ {
2148
+ "epoch": 0.9588550983899821,
2149
+ "grad_norm": 0.37904760241508484,
2150
+ "learning_rate": 8.228980322003579e-06,
2151
+ "loss": 0.396,
2152
+ "num_input_tokens_seen": 1817606,
2153
+ "step": 2680
2154
  }
2155
  ],
2156
  "logging_steps": 10,
2157
  "max_steps": 2795,
2158
+ "num_input_tokens_seen": 1817606,
2159
  "num_train_epochs": 1,
2160
  "save_steps": 20,
2161
+ "total_flos": 4.087154026071245e+16,
2162
  "train_batch_size": 1,
2163
  "trial_name": null,
2164
  "trial_params": null