stefania-radu commited on
Commit
63bd7a2
1 Parent(s): c45440c

Training in progress, step 50000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:144ab11f29aad5086eb7e021e4229dc776bde464bdeb68571c460231f7e3d4db
3
  size 893440890
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b6e0063168d01e8d9ad637627df35c664793162761ded8ac5e14cee0a39fa4b
3
  size 893440890
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79ba4cc15f1aa64c4904af07ad626b329074ee800b722742cb772b5f3107448b
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcf9f6920ad5ded398eb8f93116f71e8adc7c3a91f6cd5c0da8e0f651bc8faf
3
  size 454197066
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d24823f9423b69ce75c9be00b967bc60f7fb6bd2d410d5fef23d8bda17dae11b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:832b02611c12b9ad0dcde16a2bc02988c4f2b0b885960cfa4b9125a284bdeb99
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7511346d6e23d46db8df4597671861c1d4ab566dbbf2c42030f6ce7dfc2c854d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cfba27d7b6929b849933e9ae7d0e075a396ddca1f7f2a518e796268cde14302
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -302,11 +302,85 @@
302
  "eval_samples_per_second": 111.653,
303
  "eval_steps_per_second": 13.957,
304
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  }
306
  ],
307
  "max_steps": 1000000,
308
  "num_train_epochs": 9223372036854775807,
309
- "total_flos": 3.5418440692924416e+20,
310
  "trial_name": null,
311
  "trial_params": null
312
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
302
  "eval_samples_per_second": 111.653,
303
  "eval_steps_per_second": 13.957,
304
  "step": 40000
305
+ },
306
+ {
307
+ "epoch": 0.04,
308
+ "learning_rate": 1.5374999999999998e-05,
309
+ "loss": 0.5352,
310
+ "step": 41000
311
+ },
312
+ {
313
+ "epoch": 0.04,
314
+ "learning_rate": 1.5749999999999997e-05,
315
+ "loss": 0.5329,
316
+ "step": 42000
317
+ },
318
+ {
319
+ "epoch": 0.04,
320
+ "learning_rate": 1.6125e-05,
321
+ "loss": 0.5304,
322
+ "step": 43000
323
+ },
324
+ {
325
+ "epoch": 0.04,
326
+ "learning_rate": 1.6499999999999998e-05,
327
+ "loss": 0.5232,
328
+ "step": 44000
329
+ },
330
+ {
331
+ "epoch": 0.04,
332
+ "learning_rate": 1.6875e-05,
333
+ "loss": 0.5164,
334
+ "step": 45000
335
+ },
336
+ {
337
+ "epoch": 0.04,
338
+ "eval_runtime": 3334.4968,
339
+ "eval_samples_per_second": 101.86,
340
+ "eval_steps_per_second": 12.733,
341
+ "step": 45000
342
+ },
343
+ {
344
+ "epoch": 0.05,
345
+ "learning_rate": 1.725e-05,
346
+ "loss": 0.5133,
347
+ "step": 46000
348
+ },
349
+ {
350
+ "epoch": 0.05,
351
+ "learning_rate": 1.7624999999999998e-05,
352
+ "loss": 0.5104,
353
+ "step": 47000
354
+ },
355
+ {
356
+ "epoch": 0.05,
357
+ "learning_rate": 1.7999999999999997e-05,
358
+ "loss": 0.507,
359
+ "step": 48000
360
+ },
361
+ {
362
+ "epoch": 0.05,
363
+ "learning_rate": 1.8375e-05,
364
+ "loss": 0.5034,
365
+ "step": 49000
366
+ },
367
+ {
368
+ "epoch": 0.05,
369
+ "learning_rate": 1.875e-05,
370
+ "loss": 0.5007,
371
+ "step": 50000
372
+ },
373
+ {
374
+ "epoch": 0.05,
375
+ "eval_runtime": 3361.5856,
376
+ "eval_samples_per_second": 101.039,
377
+ "eval_steps_per_second": 12.63,
378
+ "step": 50000
379
  }
380
  ],
381
  "max_steps": 1000000,
382
  "num_train_epochs": 9223372036854775807,
383
+ "total_flos": 4.427305086615552e+20,
384
  "trial_name": null,
385
  "trial_params": null
386
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79ba4cc15f1aa64c4904af07ad626b329074ee800b722742cb772b5f3107448b
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcf9f6920ad5ded398eb8f93116f71e8adc7c3a91f6cd5c0da8e0f651bc8faf
3
  size 454197066