besimray commited on
Commit
4fff76e
·
verified ·
1 Parent(s): 1a4db54

Training in progress, step 25, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:791c778d50e613af2b99c77aca642cb9aee7567880abcf258132c18df775d086
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbeb203f46fb1c1ec8f6e277a8fabff750773722580b461a96e1cbac96a2291f
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cb76dc8f7221ff9781f9474a0e568f828e3700c7b08284ec4b9c505e19679e9
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23e132835a2fd7e3ed3d5cfe045d84119b7967d8c3a6685bd84957e36e914460
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:558b680d20c8a456dc87962293f6d283ceca35e3120c8340321f6e2afcd6b25f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7ec2f1de877992cebf6fbce0e472b6b0ae06bf82cd1caccb4ee85e3a5063b21
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c297c5cf11a27c75d9f99f1df69752f78c3ad41b0275adf50cdd1b67f9d0bb3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3295bdd7cfd599ec7f00b8b43fe7ebc7026edcd2d9d9a208dde1fb88ec2e55ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.42105263157894735,
5
  "eval_steps": 8,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -171,6 +171,49 @@
171
  "learning_rate": 0.00019749279121818235,
172
  "loss": 1.3514,
173
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
  ],
176
  "logging_steps": 1,
@@ -190,7 +233,7 @@
190
  "attributes": {}
191
  }
192
  },
193
- "total_flos": 2033729462599680.0,
194
  "train_batch_size": 10,
195
  "trial_name": null,
196
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5263157894736842,
5
  "eval_steps": 8,
6
+ "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
171
  "learning_rate": 0.00019749279121818235,
172
  "loss": 1.3514,
173
  "step": 20
174
+ },
175
+ {
176
+ "epoch": 0.4421052631578947,
177
+ "grad_norm": 0.398816853761673,
178
+ "learning_rate": 0.0001969689520376687,
179
+ "loss": 1.0967,
180
+ "step": 21
181
+ },
182
+ {
183
+ "epoch": 0.4631578947368421,
184
+ "grad_norm": 0.30981966853141785,
185
+ "learning_rate": 0.00019639628606958533,
186
+ "loss": 1.1732,
187
+ "step": 22
188
+ },
189
+ {
190
+ "epoch": 0.4842105263157895,
191
+ "grad_norm": 0.33735471963882446,
192
+ "learning_rate": 0.00019577508166849304,
193
+ "loss": 1.2267,
194
+ "step": 23
195
+ },
196
+ {
197
+ "epoch": 0.5052631578947369,
198
+ "grad_norm": 0.4743439555168152,
199
+ "learning_rate": 0.00019510565162951537,
200
+ "loss": 1.2174,
201
+ "step": 24
202
+ },
203
+ {
204
+ "epoch": 0.5052631578947369,
205
+ "eval_loss": 1.1610326766967773,
206
+ "eval_runtime": 2.0948,
207
+ "eval_samples_per_second": 47.737,
208
+ "eval_steps_per_second": 4.774,
209
+ "step": 24
210
+ },
211
+ {
212
+ "epoch": 0.5263157894736842,
213
+ "grad_norm": 0.3302611708641052,
214
+ "learning_rate": 0.00019438833303083678,
215
+ "loss": 1.199,
216
+ "step": 25
217
  }
218
  ],
219
  "logging_steps": 1,
 
233
  "attributes": {}
234
  }
235
  },
236
+ "total_flos": 2528931246243840.0,
237
  "train_batch_size": 10,
238
  "trial_name": null,
239
  "trial_params": null