marinone94 commited on
Commit
c756cf3
β€’
1 Parent(s): fdf3f3d

Training in progress, step 800

Browse files
{checkpoint-500 β†’ checkpoint-800}/config.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-800}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4f451d7dca685dc733896a3b9dde8e1707a70872842fb140a7b1b2bc09dc86e
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3228c2d654fd3d25a0ad00a301f5ce6f4265d0cc4c922a869f44a37a2b0f0f2
3
  size 2490337809
{checkpoint-500 β†’ checkpoint-800}/preprocessor_config.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-800}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee400442c70fe28b4032830f08f4c31605f74d5b5778895b5f30b7cad432bdcd
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83f2151485570eebb9c2c7ead79b5dbb947c911745136ec08aec4531150fc59d
3
  size 1262063089
{checkpoint-500 β†’ checkpoint-800}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60bd99fb8f131eb1ec63d9b9eee304bd3aa49c3b43a3a12613b8de4c1d42c4c6
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:191ab0b0b7f850bd200dd0e9733735c31b1e7d63b3150165c4de6c4a12c5ef5e
3
  size 14567
{checkpoint-500 β†’ checkpoint-800}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e
3
  size 559
{checkpoint-500 β†’ checkpoint-800}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:536ace7f76d669d6713c8de85eb8de0ed71bdc66a4ba89707e46295a79ac66a8
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:595ce5051ecea72321f0a4e15d7e1d59293398355f90dbde31fcccb29f2b4f95
3
  size 623
{checkpoint-500 β†’ checkpoint-800}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.491803278688525,
5
- "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -201,11 +201,128 @@
201
  "eval_steps_per_second": 0.796,
202
  "eval_wer": 1.0,
203
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  }
205
  ],
206
  "max_steps": 4550,
207
  "num_train_epochs": 50,
208
- "total_flos": 7.741689365780442e+18,
209
  "trial_name": null,
210
  "trial_params": null
211
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.78688524590164,
5
+ "global_step": 800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
201
  "eval_steps_per_second": 0.796,
202
  "eval_wer": 1.0,
203
  "step": 500
204
+ },
205
+ {
206
+ "epoch": 5.71,
207
+ "learning_rate": 6.500000000000001e-05,
208
+ "loss": 3.0088,
209
+ "step": 520
210
+ },
211
+ {
212
+ "epoch": 5.93,
213
+ "learning_rate": 6.75e-05,
214
+ "loss": 3.0051,
215
+ "step": 540
216
+ },
217
+ {
218
+ "epoch": 6.15,
219
+ "learning_rate": 7.000000000000001e-05,
220
+ "loss": 3.073,
221
+ "step": 560
222
+ },
223
+ {
224
+ "epoch": 6.37,
225
+ "learning_rate": 7.25e-05,
226
+ "loss": 3.0031,
227
+ "step": 580
228
+ },
229
+ {
230
+ "epoch": 6.59,
231
+ "learning_rate": 7.5e-05,
232
+ "loss": 2.9964,
233
+ "step": 600
234
+ },
235
+ {
236
+ "epoch": 6.59,
237
+ "eval_loss": 2.998962640762329,
238
+ "eval_runtime": 193.5213,
239
+ "eval_samples_per_second": 25.026,
240
+ "eval_steps_per_second": 0.785,
241
+ "eval_wer": 1.0,
242
+ "step": 600
243
+ },
244
+ {
245
+ "epoch": 6.81,
246
+ "learning_rate": 7.75e-05,
247
+ "loss": 2.9921,
248
+ "step": 620
249
+ },
250
+ {
251
+ "epoch": 7.03,
252
+ "learning_rate": 8e-05,
253
+ "loss": 3.0665,
254
+ "step": 640
255
+ },
256
+ {
257
+ "epoch": 7.25,
258
+ "learning_rate": 8.25e-05,
259
+ "loss": 2.9826,
260
+ "step": 660
261
+ },
262
+ {
263
+ "epoch": 7.47,
264
+ "learning_rate": 8.5e-05,
265
+ "loss": 2.9689,
266
+ "step": 680
267
+ },
268
+ {
269
+ "epoch": 7.69,
270
+ "learning_rate": 8.75e-05,
271
+ "loss": 2.9602,
272
+ "step": 700
273
+ },
274
+ {
275
+ "epoch": 7.69,
276
+ "eval_loss": 2.9620397090911865,
277
+ "eval_runtime": 193.5851,
278
+ "eval_samples_per_second": 25.017,
279
+ "eval_steps_per_second": 0.785,
280
+ "eval_wer": 1.0,
281
+ "step": 700
282
+ },
283
+ {
284
+ "epoch": 7.91,
285
+ "learning_rate": 8.999999999999999e-05,
286
+ "loss": 2.9639,
287
+ "step": 720
288
+ },
289
+ {
290
+ "epoch": 8.13,
291
+ "learning_rate": 9.25e-05,
292
+ "loss": 3.0215,
293
+ "step": 740
294
+ },
295
+ {
296
+ "epoch": 8.35,
297
+ "learning_rate": 9.5e-05,
298
+ "loss": 2.9454,
299
+ "step": 760
300
+ },
301
+ {
302
+ "epoch": 8.57,
303
+ "learning_rate": 9.750000000000001e-05,
304
+ "loss": 2.9239,
305
+ "step": 780
306
+ },
307
+ {
308
+ "epoch": 8.79,
309
+ "learning_rate": 0.0001,
310
+ "loss": 2.8756,
311
+ "step": 800
312
+ },
313
+ {
314
+ "epoch": 8.79,
315
+ "eval_loss": 2.7302000522613525,
316
+ "eval_runtime": 191.8065,
317
+ "eval_samples_per_second": 25.249,
318
+ "eval_steps_per_second": 0.792,
319
+ "eval_wer": 1.0,
320
+ "step": 800
321
  }
322
  ],
323
  "max_steps": 4550,
324
  "num_train_epochs": 50,
325
+ "total_flos": 1.2402928809554872e+19,
326
  "trial_name": null,
327
  "trial_params": null
328
  }
{checkpoint-500 β†’ checkpoint-800}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:866e0246bd78706b1bfb6c07818d34662e459927e54718460f8a950f5fc33ae8
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83f2151485570eebb9c2c7ead79b5dbb947c911745136ec08aec4531150fc59d
3
  size 1262063089