MeedoSam commited on
Commit
26cf530
1 Parent(s): fa164d6

Uploaded checkpoint-20000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a6cdbde4f4ccad23f4f917c627b157da08e6dc847e2ed92ac14c89589880841
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8d86c87b10946be5e19fd09e6c945826193408dd0c45733f14b421ff48c3d03
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4075ec4b36d7b30f8711cdc8411eb66cf7511268abac12ba6b2db316a1ee7dd7
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de3c6d36565a894febe83bdaf8decd391144d8d18bb0a4d0adc45544c47e3860
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81db4d08b4f0905e940e600b34d55e9d49b371b457fe52a1dd9b0fc61bd94779
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3788580bf499dcd38db18c9ad678a723e513e1df6a53a304a6b7c2bb74dc2674
3
+ size 14180
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a95ec91bf6b79176b87aeb9e7899423b6eea55e3aa8c4701936014715782af7e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ba0dbbe5ceaff99b526aba1b21b83c949f035061745afe93ca5ff87a34da88f
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4375,
5
  "eval_steps": 2500,
6
- "global_step": 17500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -182,6 +182,35 @@
182
  "eval_samples_per_second": 4.959,
183
  "eval_steps_per_second": 4.959,
184
  "step": 17500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  }
186
  ],
187
  "logging_steps": 1000,
@@ -189,7 +218,7 @@
189
  "num_input_tokens_seen": 0,
190
  "num_train_epochs": 1,
191
  "save_steps": 2500,
192
- "total_flos": 2.8178720489472e+17,
193
  "train_batch_size": 1,
194
  "trial_name": null,
195
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5,
5
  "eval_steps": 2500,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
182
  "eval_samples_per_second": 4.959,
183
  "eval_steps_per_second": 4.959,
184
  "step": 17500
185
+ },
186
+ {
187
+ "epoch": 0.45,
188
+ "grad_norm": 3.8097546100616455,
189
+ "learning_rate": 9.85913043478261e-06,
190
+ "loss": 1.35,
191
+ "step": 18000
192
+ },
193
+ {
194
+ "epoch": 0.47,
195
+ "grad_norm": 6.639487266540527,
196
+ "learning_rate": 9.279420289855074e-06,
197
+ "loss": 1.3498,
198
+ "step": 19000
199
+ },
200
+ {
201
+ "epoch": 0.5,
202
+ "grad_norm": 4.806727886199951,
203
+ "learning_rate": 8.700289855072464e-06,
204
+ "loss": 1.3372,
205
+ "step": 20000
206
+ },
207
+ {
208
+ "epoch": 0.5,
209
+ "eval_loss": 1.3245751857757568,
210
+ "eval_runtime": 201.5593,
211
+ "eval_samples_per_second": 4.961,
212
+ "eval_steps_per_second": 4.961,
213
+ "step": 20000
214
  }
215
  ],
216
  "logging_steps": 1000,
 
218
  "num_input_tokens_seen": 0,
219
  "num_train_epochs": 1,
220
  "save_steps": 2500,
221
+ "total_flos": 3.2204251987968e+17,
222
  "train_batch_size": 1,
223
  "trial_name": null,
224
  "trial_params": null