jflotz commited on
Commit
3cd6a82
1 Parent(s): fdc51a1

Training in progress, step 980000

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-570000",
3
  "architectures": [
4
  "PIXELForPreTraining"
5
  ],
 
1
  {
2
+ "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-970000",
3
  "architectures": [
4
  "PIXELForPreTraining"
5
  ],
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-570000",
3
  "architectures": [
4
  "PIXELForPreTraining"
5
  ],
 
1
  {
2
+ "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-970000",
3
  "architectures": [
4
  "PIXELForPreTraining"
5
  ],
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de1b3977166c4e20fc41f424497409f61e5dbee702d8ad14048093e7cfab3225
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa6e21575dd459731b96c75fb2eff44427788a2b21e2cba9f9983669023c697a
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebc5b0eb8c3bb3dd69db934b29e5beccddfae1a796cf948830a8b05766d44497
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081e5eabe8ef9a2817820443cfba02d1a6ecee053832fff6fbfbe29c77150986
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9af3eb0d3db8162f6de4427ee5f19b1787f4bdb865e0ebda13f4fed6034a8890
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1f60f9446cba0320cf9ced93c4b14816af8d6988d011f7cc2f5b01e8ada101d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.461148968916945,
5
- "global_step": 970000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -19406,11 +19406,211 @@
19406
  "eval_samples_per_second": 884.178,
19407
  "eval_steps_per_second": 13.857,
19408
  "step": 970000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19409
  }
19410
  ],
19411
  "max_steps": 1000000,
19412
  "num_train_epochs": 12,
19413
- "total_flos": 6.799670555468517e+22,
19414
  "trial_name": null,
19415
  "trial_params": null
19416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.929814973846515,
5
+ "global_step": 980000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
19406
  "eval_samples_per_second": 884.178,
19407
  "eval_steps_per_second": 13.857,
19408
  "step": 970000
19409
+ },
19410
+ {
19411
+ "epoch": 10.82,
19412
+ "learning_rate": 1.0332828229586692e-05,
19413
+ "loss": 0.1799,
19414
+ "step": 970500
19415
+ },
19416
+ {
19417
+ "epoch": 10.83,
19418
+ "learning_rate": 1.032165010471157e-05,
19419
+ "loss": 0.1796,
19420
+ "step": 971000
19421
+ },
19422
+ {
19423
+ "epoch": 10.83,
19424
+ "eval_loss": 0.17119638621807098,
19425
+ "eval_runtime": 2.5911,
19426
+ "eval_samples_per_second": 886.512,
19427
+ "eval_steps_per_second": 13.894,
19428
+ "step": 971000
19429
+ },
19430
+ {
19431
+ "epoch": 10.84,
19432
+ "learning_rate": 1.0310662477784401e-05,
19433
+ "loss": 0.1804,
19434
+ "step": 971500
19435
+ },
19436
+ {
19437
+ "epoch": 10.84,
19438
+ "learning_rate": 1.0299865378844936e-05,
19439
+ "loss": 0.1798,
19440
+ "step": 972000
19441
+ },
19442
+ {
19443
+ "epoch": 10.84,
19444
+ "eval_loss": 0.1710081547498703,
19445
+ "eval_runtime": 2.5437,
19446
+ "eval_samples_per_second": 903.014,
19447
+ "eval_steps_per_second": 14.153,
19448
+ "step": 972000
19449
+ },
19450
+ {
19451
+ "epoch": 10.85,
19452
+ "learning_rate": 1.028925883741203e-05,
19453
+ "loss": 0.18,
19454
+ "step": 972500
19455
+ },
19456
+ {
19457
+ "epoch": 10.85,
19458
+ "learning_rate": 1.0278842882483569e-05,
19459
+ "loss": 0.1797,
19460
+ "step": 973000
19461
+ },
19462
+ {
19463
+ "epoch": 10.85,
19464
+ "eval_loss": 0.17146818339824677,
19465
+ "eval_runtime": 2.5692,
19466
+ "eval_samples_per_second": 894.045,
19467
+ "eval_steps_per_second": 14.012,
19468
+ "step": 973000
19469
+ },
19470
+ {
19471
+ "epoch": 10.86,
19472
+ "learning_rate": 1.026861754253637e-05,
19473
+ "loss": 0.1796,
19474
+ "step": 973500
19475
+ },
19476
+ {
19477
+ "epoch": 10.86,
19478
+ "learning_rate": 1.025858284552612e-05,
19479
+ "loss": 0.1797,
19480
+ "step": 974000
19481
+ },
19482
+ {
19483
+ "epoch": 10.86,
19484
+ "eval_loss": 0.1706797480583191,
19485
+ "eval_runtime": 2.6865,
19486
+ "eval_samples_per_second": 855.008,
19487
+ "eval_steps_per_second": 13.4,
19488
+ "step": 974000
19489
+ },
19490
+ {
19491
+ "epoch": 10.87,
19492
+ "learning_rate": 1.0248738818887307e-05,
19493
+ "loss": 0.1799,
19494
+ "step": 974500
19495
+ },
19496
+ {
19497
+ "epoch": 10.87,
19498
+ "learning_rate": 1.023908548953311e-05,
19499
+ "loss": 0.1799,
19500
+ "step": 975000
19501
+ },
19502
+ {
19503
+ "epoch": 10.87,
19504
+ "eval_loss": 0.1708817481994629,
19505
+ "eval_runtime": 2.5759,
19506
+ "eval_samples_per_second": 891.738,
19507
+ "eval_steps_per_second": 13.976,
19508
+ "step": 975000
19509
+ },
19510
+ {
19511
+ "epoch": 10.88,
19512
+ "learning_rate": 1.0229622883855378e-05,
19513
+ "loss": 0.1798,
19514
+ "step": 975500
19515
+ },
19516
+ {
19517
+ "epoch": 10.89,
19518
+ "learning_rate": 1.02203510277245e-05,
19519
+ "loss": 0.1796,
19520
+ "step": 976000
19521
+ },
19522
+ {
19523
+ "epoch": 10.89,
19524
+ "eval_loss": 0.1709393560886383,
19525
+ "eval_runtime": 2.6094,
19526
+ "eval_samples_per_second": 880.296,
19527
+ "eval_steps_per_second": 13.797,
19528
+ "step": 976000
19529
+ },
19530
+ {
19531
+ "epoch": 10.89,
19532
+ "learning_rate": 1.021126994648939e-05,
19533
+ "loss": 0.1801,
19534
+ "step": 976500
19535
+ },
19536
+ {
19537
+ "epoch": 10.9,
19538
+ "learning_rate": 1.0202379664977364e-05,
19539
+ "loss": 0.1799,
19540
+ "step": 977000
19541
+ },
19542
+ {
19543
+ "epoch": 10.9,
19544
+ "eval_loss": 0.17174768447875977,
19545
+ "eval_runtime": 2.6289,
19546
+ "eval_samples_per_second": 873.739,
19547
+ "eval_steps_per_second": 13.694,
19548
+ "step": 977000
19549
+ },
19550
+ {
19551
+ "epoch": 10.9,
19552
+ "learning_rate": 1.019368020749412e-05,
19553
+ "loss": 0.1797,
19554
+ "step": 977500
19555
+ },
19556
+ {
19557
+ "epoch": 10.91,
19558
+ "learning_rate": 1.018517159782365e-05,
19559
+ "loss": 0.1797,
19560
+ "step": 978000
19561
+ },
19562
+ {
19563
+ "epoch": 10.91,
19564
+ "eval_loss": 0.16800174117088318,
19565
+ "eval_runtime": 2.57,
19566
+ "eval_samples_per_second": 893.767,
19567
+ "eval_steps_per_second": 14.008,
19568
+ "step": 978000
19569
+ },
19570
+ {
19571
+ "epoch": 10.91,
19572
+ "learning_rate": 1.0176853859228149e-05,
19573
+ "loss": 0.1794,
19574
+ "step": 978500
19575
+ },
19576
+ {
19577
+ "epoch": 10.92,
19578
+ "learning_rate": 1.0168727014448004e-05,
19579
+ "loss": 0.1794,
19580
+ "step": 979000
19581
+ },
19582
+ {
19583
+ "epoch": 10.92,
19584
+ "eval_loss": 0.16953879594802856,
19585
+ "eval_runtime": 2.6173,
19586
+ "eval_samples_per_second": 877.629,
19587
+ "eval_steps_per_second": 13.755,
19588
+ "step": 979000
19589
+ },
19590
+ {
19591
+ "epoch": 10.92,
19592
+ "learning_rate": 1.0160791085701714e-05,
19593
+ "loss": 0.1798,
19594
+ "step": 979500
19595
+ },
19596
+ {
19597
+ "epoch": 10.93,
19598
+ "learning_rate": 1.0153046094685783e-05,
19599
+ "loss": 0.1794,
19600
+ "step": 980000
19601
+ },
19602
+ {
19603
+ "epoch": 10.93,
19604
+ "eval_loss": 0.1709355264902115,
19605
+ "eval_runtime": 2.587,
19606
+ "eval_samples_per_second": 887.915,
19607
+ "eval_steps_per_second": 13.916,
19608
+ "step": 980000
19609
  }
19610
  ],
19611
  "max_steps": 1000000,
19612
  "num_train_epochs": 12,
19613
+ "total_flos": 6.869770816498864e+22,
19614
  "trial_name": null,
19615
  "trial_params": null
19616
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a6ae323073c615770ba7c18bfa53b7b7169d86c9cda5252cdcc294ff053a147
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a854cf81d57a7e4747d79eeee0e792b9b0db2dfcccddbeaecfbfa4a0ff53eef
3
  size 3311
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebc5b0eb8c3bb3dd69db934b29e5beccddfae1a796cf948830a8b05766d44497
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081e5eabe8ef9a2817820443cfba02d1a6ecee053832fff6fbfbe29c77150986
3
  size 449471589
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a6ae323073c615770ba7c18bfa53b7b7169d86c9cda5252cdcc294ff053a147
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a854cf81d57a7e4747d79eeee0e792b9b0db2dfcccddbeaecfbfa4a0ff53eef
3
  size 3311