HealthTeam commited on
Commit
e55d09d
1 Parent(s): 27ca8bc

Training in progress, step 44192

Browse files
last-checkpoint/generation_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "_from_model_config": true,
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
- "max_length": 300,
6
  "pad_token_id": 0,
7
  "transformers_version": "4.26.1"
8
  }
 
2
  "_from_model_config": true,
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
+ "max_length": 1024,
6
  "pad_token_id": 0,
7
  "transformers_version": "4.26.1"
8
  }
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1d04c124647d72d3fc65dfa5db54a576cf67715bb9d9b3adc80890d34fcf2ec
3
  size 2401461253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:735d18dafec0eb0d88dc8f14c6729fbeaedd8e691030fc2db5b2412727e9aee7
3
  size 2401461253
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46eba11efcb2f8397d7585a4be3b76d5c6a48fe6fa0c51c3fd0f692262a0214c
3
  size 1200739717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34ff48fe0ad9782c0ff82a7ab7cbb2a6863c809cb138abf98a7aebec9da7688d
3
  size 1200739717
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:469b203b30517b41378464133ae500974ade0c5b2d4180972b0a524ae758b0fe
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9985ba16d61e95c50d1b9cec08c60d6a3a483569fabd9a37aabc0a1c5f05bc9b
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a14d07d4b77ce7ca47e32ae34b2ecdda9856c350be4e59018afeb5af45fac27
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:932a3fd2a2c6fac0cf60b74f87ec91fc889b1355ae07661a0816ca1fa6dd613f
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9861057391925263,
5
- "global_step": 33144,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -402,11 +402,152 @@
402
  "learning_rate": 1.3454523816607659e-05,
403
  "loss": 2.4452,
404
  "step": 33000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  }
406
  ],
407
  "max_steps": 100833,
408
  "num_train_epochs": 3,
409
- "total_flos": 8.921229210825523e+16,
410
  "trial_name": null,
411
  "trial_params": null
412
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3148076522567016,
5
+ "global_step": 44192,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
402
  "learning_rate": 1.3454523816607659e-05,
403
  "loss": 2.4452,
404
  "step": 33000
405
+ },
406
+ {
407
+ "epoch": 1.0,
408
+ "learning_rate": 1.3355349935041108e-05,
409
+ "loss": 2.4438,
410
+ "step": 33500
411
+ },
412
+ {
413
+ "epoch": 1.0,
414
+ "eval_bleu": 16.655484084216944,
415
+ "eval_loss": 2.04453706741333,
416
+ "eval_runtime": 4558.2525,
417
+ "eval_samples_per_second": 8.87,
418
+ "eval_steps_per_second": 0.277,
419
+ "step": 33611
420
+ },
421
+ {
422
+ "epoch": 1.01,
423
+ "learning_rate": 1.3256176053474558e-05,
424
+ "loss": 2.44,
425
+ "step": 34000
426
+ },
427
+ {
428
+ "epoch": 1.03,
429
+ "learning_rate": 1.3157002171908007e-05,
430
+ "loss": 2.4188,
431
+ "step": 34500
432
+ },
433
+ {
434
+ "epoch": 1.04,
435
+ "learning_rate": 1.3057828290341456e-05,
436
+ "loss": 2.4264,
437
+ "step": 35000
438
+ },
439
+ {
440
+ "epoch": 1.06,
441
+ "learning_rate": 1.2958654408774906e-05,
442
+ "loss": 2.4337,
443
+ "step": 35500
444
+ },
445
+ {
446
+ "epoch": 1.07,
447
+ "learning_rate": 1.2859480527208354e-05,
448
+ "loss": 2.433,
449
+ "step": 36000
450
+ },
451
+ {
452
+ "epoch": 1.09,
453
+ "learning_rate": 1.2760306645641804e-05,
454
+ "loss": 2.4277,
455
+ "step": 36500
456
+ },
457
+ {
458
+ "epoch": 1.1,
459
+ "learning_rate": 1.2661132764075254e-05,
460
+ "loss": 2.43,
461
+ "step": 37000
462
+ },
463
+ {
464
+ "epoch": 1.12,
465
+ "learning_rate": 1.2561958882508702e-05,
466
+ "loss": 2.4343,
467
+ "step": 37500
468
+ },
469
+ {
470
+ "epoch": 1.13,
471
+ "learning_rate": 1.2462785000942152e-05,
472
+ "loss": 2.4278,
473
+ "step": 38000
474
+ },
475
+ {
476
+ "epoch": 1.15,
477
+ "learning_rate": 1.2363611119375602e-05,
478
+ "loss": 2.4462,
479
+ "step": 38500
480
+ },
481
+ {
482
+ "epoch": 1.16,
483
+ "learning_rate": 1.2264437237809052e-05,
484
+ "loss": 2.4115,
485
+ "step": 39000
486
+ },
487
+ {
488
+ "epoch": 1.18,
489
+ "learning_rate": 1.21652633562425e-05,
490
+ "loss": 2.426,
491
+ "step": 39500
492
+ },
493
+ {
494
+ "epoch": 1.19,
495
+ "learning_rate": 1.206608947467595e-05,
496
+ "loss": 2.4384,
497
+ "step": 40000
498
+ },
499
+ {
500
+ "epoch": 1.2,
501
+ "learning_rate": 1.19669155931094e-05,
502
+ "loss": 2.418,
503
+ "step": 40500
504
+ },
505
+ {
506
+ "epoch": 1.22,
507
+ "learning_rate": 1.1867741711542848e-05,
508
+ "loss": 2.4409,
509
+ "step": 41000
510
+ },
511
+ {
512
+ "epoch": 1.23,
513
+ "learning_rate": 1.1768567829976298e-05,
514
+ "loss": 2.4147,
515
+ "step": 41500
516
+ },
517
+ {
518
+ "epoch": 1.25,
519
+ "learning_rate": 1.1669393948409748e-05,
520
+ "loss": 2.4325,
521
+ "step": 42000
522
+ },
523
+ {
524
+ "epoch": 1.26,
525
+ "learning_rate": 1.1570220066843196e-05,
526
+ "loss": 2.4317,
527
+ "step": 42500
528
+ },
529
+ {
530
+ "epoch": 1.28,
531
+ "learning_rate": 1.1471046185276646e-05,
532
+ "loss": 2.4253,
533
+ "step": 43000
534
+ },
535
+ {
536
+ "epoch": 1.29,
537
+ "learning_rate": 1.1371872303710096e-05,
538
+ "loss": 2.4249,
539
+ "step": 43500
540
+ },
541
+ {
542
+ "epoch": 1.31,
543
+ "learning_rate": 1.1272698422143544e-05,
544
+ "loss": 2.4322,
545
+ "step": 44000
546
  }
547
  ],
548
  "max_steps": 100833,
549
  "num_train_epochs": 3,
550
+ "total_flos": 1.1901350172465562e+17,
551
  "trial_name": null,
552
  "trial_params": null
553
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46eba11efcb2f8397d7585a4be3b76d5c6a48fe6fa0c51c3fd0f692262a0214c
3
  size 1200739717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34ff48fe0ad9782c0ff82a7ab7cbb2a6863c809cb138abf98a7aebec9da7688d
3
  size 1200739717
runs/Feb14_18-29-07_39730b194efc/events.out.tfevents.1676399391.39730b194efc.3664.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56e2e8efcb7cc8ee42e9a2730685187108f9f744f485a7c006151d8a13539954
3
- size 14827
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d9dfbf22943453ae081381839560fdfbdba7c8a881a29e0a7f925e30d18ae48
3
+ size 18672